diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index d1015630b05d1..261f7e49e5c8c 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -558,6 +558,7 @@ BEGIN_TWO_BYTE_PACK() class LoadSDNodeBitfields { friend class LoadSDNode; + friend class AtomicSDNode; friend class VPLoadSDNode; friend class VPStridedLoadSDNode; friend class MaskedLoadSDNode; @@ -1475,6 +1476,16 @@ class AtomicSDNode : public MemSDNode { MMO->isAtomic()) && "then why are we using an AtomicSDNode?"); } + void setExtensionType(ISD::LoadExtType ETy) { + assert(getOpcode() == ISD::ATOMIC_LOAD && "Only used for atomic loads."); + LoadSDNodeBits.ExtTy = ETy; + } + + ISD::LoadExtType getExtensionType() const { + assert(getOpcode() == ISD::ATOMIC_LOAD && "Only used for atomic loads."); + return static_cast(LoadSDNodeBits.ExtTy); + } + const SDValue &getBasePtr() const { return getOpcode() == ISD::ATOMIC_STORE ? getOperand(2) : getOperand(1); } diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index d7bf8c35ee107..ea3520835fa07 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -318,7 +318,7 @@ def SDTAtomicStore : SDTypeProfile<0, 2, [ SDTCisInt<0>, SDTCisPtrTy<1> ]>; def SDTAtomicLoad : SDTypeProfile<1, 1, [ - SDTCisInt<0>, SDTCisPtrTy<1> + SDTCisPtrTy<1> ]>; class SDCallSeqStart constraints> : diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 52e12cf364066..93ce9c22af552 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -349,6 +349,26 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) { N->getMemoryVT(), ResVT, N->getChain(), N->getBasePtr(), N->getMemOperand()); + if (N->getOpcode() == ISD::ATOMIC_LOAD) { + ISD::LoadExtType ETy = cast(N)->getExtensionType(); + if (ETy == ISD::NON_EXTLOAD) { + switch (TLI.getExtendForAtomicOps()) { + case ISD::SIGN_EXTEND: + ETy = ISD::SEXTLOAD; + break; + case ISD::ZERO_EXTEND: + ETy = ISD::ZEXTLOAD; + break; + case ISD::ANY_EXTEND: + ETy = ISD::EXTLOAD; + break; + default: + llvm_unreachable("Invalid atomic op extension"); + } + } + cast(Res)->setExtensionType(ETy); + } + // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index fc2517ea7ef08..6f6ed4bd45027 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4070,6 +4070,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, if (Op.getResNo() == 0) { if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND) Known.Zero.setBitsFrom(MemBits); + else if (Op->getOpcode() == ISD::ATOMIC_LOAD && + cast(Op)->getExtensionType() == ISD::ZEXTLOAD) + Known.Zero.setBitsFrom(MemBits); } break; } @@ -4875,6 +4878,13 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return VTBits - Tmp + 1; if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND) return VTBits - Tmp; + if (Op->getOpcode() == ISD::ATOMIC_LOAD) { + ISD::LoadExtType ETy = cast(Op)->getExtensionType(); + if (ETy == ISD::SEXTLOAD) + return VTBits - Tmp + 1; + if (ETy == ISD::ZEXTLOAD) + return VTBits - Tmp; + } } break; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index fa71adc8da3f3..20375a0f92b23 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -841,6 +841,18 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { } else if (const MemSDNode *M = dyn_cast(this)) { OS << "<"; printMemOperand(OS, *M->getMemOperand(), G); + if (auto *A = dyn_cast(M)) + if (A->getOpcode() == ISD::ATOMIC_LOAD) { + bool doExt = true; + switch (A->getExtensionType()) { + default: doExt = false; break; + case ISD::EXTLOAD: OS << ", anyext"; break; + case ISD::SEXTLOAD: OS << ", sext"; break; + case ISD::ZEXTLOAD: OS << ", zext"; break; + } + if (doExt) + OS << " from " << A->getMemoryVT(); + } OS << ">"; } else if (const BlockAddressSDNode *BA = dyn_cast(this)) { diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 82da1a3c30598..6423e692d88c3 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -5045,12 +5045,12 @@ defm : TrapExtendedMnemonic<"lng", 6>; defm : TrapExtendedMnemonic<"u", 31>; // Atomic loads -def : Pat<(atomic_load_8 DForm:$src), (LBZ memri:$src)>; -def : Pat<(atomic_load_16 DForm:$src), (LHZ memri:$src)>; -def : Pat<(atomic_load_32 DForm:$src), (LWZ memri:$src)>; -def : Pat<(atomic_load_8 XForm:$src), (LBZX memrr:$src)>; -def : Pat<(atomic_load_16 XForm:$src), (LHZX memrr:$src)>; -def : Pat<(atomic_load_32 XForm:$src), (LWZX memrr:$src)>; +def : Pat<(i32 (atomic_load_8 DForm:$src)), (LBZ memri:$src)>; +def : Pat<(i32 (atomic_load_16 DForm:$src)), (LHZ memri:$src)>; +def : Pat<(i32 (atomic_load_32 DForm:$src)), (LWZ memri:$src)>; +def : Pat<(i32 (atomic_load_8 XForm:$src)), (LBZX memrr:$src)>; +def : Pat<(i32 (atomic_load_16 XForm:$src)), (LHZX memrr:$src)>; +def : Pat<(i32 (atomic_load_32 XForm:$src)), (LWZX memrr:$src)>; // Atomic stores def : Pat<(atomic_store_8 i32:$val, DForm:$ptr), (STB gprc:$val, memri:$ptr)>; diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td index d5a372e4dc101..5f2937d47a519 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrP10.td +++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td @@ -1289,13 +1289,13 @@ let Predicates = [PCRelativeMemops] in { (PSTXVpc $XS, $ga, 0)>; // Atomic Load - def : Pat<(atomic_load_8 (PPCmatpcreladdr PCRelForm:$ga)), + def : Pat<(i32 (atomic_load_8 (PPCmatpcreladdr PCRelForm:$ga))), (PLBZpc $ga, 0)>; - def : Pat<(atomic_load_16 (PPCmatpcreladdr PCRelForm:$ga)), + def : Pat<(i32 (atomic_load_16 (PPCmatpcreladdr PCRelForm:$ga))), (PLHZpc $ga, 0)>; - def : Pat<(atomic_load_32 (PPCmatpcreladdr PCRelForm:$ga)), + def : Pat<(i32 (atomic_load_32 (PPCmatpcreladdr PCRelForm:$ga))), (PLWZpc $ga, 0)>; - def : Pat<(atomic_load_64 (PPCmatpcreladdr PCRelForm:$ga)), + def : Pat<(i64 (atomic_load_64 (PPCmatpcreladdr PCRelForm:$ga))), (PLDpc $ga, 0)>; // Atomic Store @@ -2347,10 +2347,10 @@ let Predicates = [PrefixInstrs] in { def : Pat<(store f64:$FRS, PDForm:$dst), (PSTFD $FRS, memri34:$dst)>; // Atomic Load - def : Pat<(atomic_load_8 PDForm:$src), (PLBZ memri34:$src)>; - def : Pat<(atomic_load_16 PDForm:$src), (PLHZ memri34:$src)>; - def : Pat<(atomic_load_32 PDForm:$src), (PLWZ memri34:$src)>; - def : Pat<(atomic_load_64 PDForm:$src), (PLD memri34:$src)>; + def : Pat<(i32 (atomic_load_8 PDForm:$src)), (PLBZ memri34:$src)>; + def : Pat<(i32 (atomic_load_16 PDForm:$src)), (PLHZ memri34:$src)>; + def : Pat<(i32 (atomic_load_32 PDForm:$src)), (PLWZ memri34:$src)>; + def : Pat<(i64 (atomic_load_64 PDForm:$src)), (PLD memri34:$src)>; // Atomic Store def : Pat<(atomic_store_8 i32:$RS, PDForm:$dst), (PSTB $RS, memri34:$dst)>; diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 815eca1240d82..deaf3dcaeb92a 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -344,6 +344,9 @@ class SystemZDAGToDAGISel : public SelectionDAGISel { // requirements for a PC-relative access. bool storeLoadIsAligned(SDNode *N) const; + // Return the load extension type of a load or atomic load. + ISD::LoadExtType getLoadExtType(SDNode *N) const; + // Try to expand a boolean SELECT_CCMASK using an IPM sequence. SDValue expandSelectBoolean(SDNode *Node); @@ -1507,15 +1510,17 @@ bool SystemZDAGToDAGISel::storeLoadCanUseBlockBinary(SDNode *N, bool SystemZDAGToDAGISel::storeLoadIsAligned(SDNode *N) const { - auto *MemAccess = cast(N); + auto *MemAccess = cast(N); + auto *LdSt = dyn_cast(MemAccess); TypeSize StoreSize = MemAccess->getMemoryVT().getStoreSize(); SDValue BasePtr = MemAccess->getBasePtr(); MachineMemOperand *MMO = MemAccess->getMemOperand(); assert(MMO && "Expected a memory operand."); // The memory access must have a proper alignment and no index register. + // Only load and store nodes have the offset operand (atomic loads do not). if (MemAccess->getAlign().value() < StoreSize || - !MemAccess->getOffset().isUndef()) + (LdSt && !LdSt->getOffset().isUndef())) return false; // The MMO must not have an unaligned offset. @@ -1545,6 +1550,17 @@ bool SystemZDAGToDAGISel::storeLoadIsAligned(SDNode *N) const { return true; } +ISD::LoadExtType SystemZDAGToDAGISel::getLoadExtType(SDNode *N) const { + ISD::LoadExtType ETy; + if (auto *L = dyn_cast(N)) + ETy = L->getExtensionType(); + else if (auto *AL = dyn_cast(N)) + ETy = AL->getExtensionType(); + else + llvm_unreachable("Unkown load node type."); + return ETy; +} + void SystemZDAGToDAGISel::Select(SDNode *Node) { // If we have a custom node, we already have selected! if (Node->isMachineOpcode()) { @@ -1742,6 +1758,26 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) { } break; } + + case ISD::ATOMIC_STORE: { + auto *AtomOp = cast(Node); + // Replace the atomic_store with a regular store and select it. This is + // ok since we know all store instructions <= 8 bytes are atomic, and the + // 16 byte case is already handled during lowering. + StoreSDNode *St = cast(CurDAG->getTruncStore( + AtomOp->getChain(), SDLoc(AtomOp), AtomOp->getVal(), + AtomOp->getBasePtr(), AtomOp->getMemoryVT(), AtomOp->getMemOperand())); + assert(St->getMemOperand()->isAtomic() && "Broken MMO."); + SDNode *Chain = St; + // We have to enforce sequential consistency by performing a + // serialization operation after the store. + if (AtomOp->getSuccessOrdering() == AtomicOrdering::SequentiallyConsistent) + Chain = CurDAG->getMachineNode(SystemZ::Serialize, SDLoc(AtomOp), + MVT::Other, SDValue(Chain, 0)); + ReplaceNode(Node, Chain); + SelectCode(St); + return; + } } SelectCode(Node); diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 3b85a6ac0371e..887c35a7ba240 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -194,11 +194,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::UADDO_CARRY, VT, Custom); setOperationAction(ISD::USUBO_CARRY, VT, Custom); - // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and - // stores, putting a serialization instruction after the stores. - setOperationAction(ISD::ATOMIC_LOAD, VT, Custom); - setOperationAction(ISD::ATOMIC_STORE, VT, Custom); - // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are // available, or if the operand is constant. setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); @@ -920,6 +915,22 @@ bool SystemZTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const return false; } +TargetLowering::AtomicExpansionKind +SystemZTargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const { + // Lower fp128 the same way as i128. + if (LI->getType()->isFP128Ty()) + return AtomicExpansionKind::CastToInteger; + return AtomicExpansionKind::None; +} + +TargetLowering::AtomicExpansionKind +SystemZTargetLowering::shouldCastAtomicStoreInIR(StoreInst *SI) const { + // Lower fp128 the same way as i128. + if (SI->getValueOperand()->getType()->isFP128Ty()) + return AtomicExpansionKind::CastToInteger; + return AtomicExpansionKind::None; +} + TargetLowering::AtomicExpansionKind SystemZTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { // Don't expand subword operations as they require special treatment. @@ -4503,40 +4514,14 @@ SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op, return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); } -// Op is an atomic load. Lower it into a normal volatile load. -SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op, - SelectionDAG &DAG) const { - auto *Node = cast(Op.getNode()); - if (Node->getMemoryVT() == MVT::i128) { - // Use same code to handle both legal and non-legal i128 types. - SmallVector Results; - LowerOperationWrapper(Node, Results, DAG); - return DAG.getMergeValues(Results, SDLoc(Op)); - } - return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(), - Node->getChain(), Node->getBasePtr(), - Node->getMemoryVT(), Node->getMemOperand()); -} - -// Op is an atomic store. Lower it into a normal volatile store. -SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op, - SelectionDAG &DAG) const { +SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op, + SelectionDAG &DAG) const { auto *Node = cast(Op.getNode()); - if (Node->getMemoryVT() == MVT::i128) { - // Use same code to handle both legal and non-legal i128 types. - SmallVector Results; - LowerOperationWrapper(Node, Results, DAG); - return DAG.getMergeValues(Results, SDLoc(Op)); - } - SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(), - Node->getBasePtr(), Node->getMemoryVT(), - Node->getMemOperand()); - // We have to enforce sequential consistency by performing a - // serialization operation after the store. - if (Node->getSuccessOrdering() == AtomicOrdering::SequentiallyConsistent) - Chain = SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op), - MVT::Other, Chain), 0); - return Chain; + assert(Node->getMemoryVT() == MVT::i128 && "Only custom lowering i128."); + // Use same code to handle both legal and non-legal i128 types. + SmallVector Results; + LowerOperationWrapper(Node, Results, DAG); + return DAG.getMergeValues(Results, SDLoc(Op)); } // Prepare for a Compare And Swap for a subword operation. This needs to be @@ -5662,6 +5647,9 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const { if (Op.getOpcode() == ISD::LOAD && cast(Op)->isUnindexed()) return true; + if (auto *AL = dyn_cast(Op)) + if (AL->getOpcode() == ISD::ATOMIC_LOAD) + return true; if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV) return true; return false; @@ -6138,9 +6126,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, case ISD::ATOMIC_SWAP: return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW); case ISD::ATOMIC_STORE: - return lowerATOMIC_STORE(Op, DAG); case ISD::ATOMIC_LOAD: - return lowerATOMIC_LOAD(Op, DAG); + return lowerATOMIC_LDST_I128(Op, DAG); case ISD::ATOMIC_LOAD_ADD: return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD); case ISD::ATOMIC_LOAD_SUB: @@ -6587,6 +6574,27 @@ SDValue SystemZTargetLowering::combineTruncateExtract( return SDValue(); } +// Replace ALoad with a new ATOMIC_LOAD with a result that is extended to VT +// per ETy. +static SDValue extendAtomicLoad(AtomicSDNode *ALoad, EVT VT, SelectionDAG &DAG, + ISD::LoadExtType ETy) { + if (VT.getSizeInBits() > 64) + return SDValue(); + EVT OrigVT = ALoad->getValueType(0); + assert(OrigVT.getSizeInBits() < VT.getSizeInBits() && "VT should be wider."); + EVT MemoryVT = ALoad->getMemoryVT(); + auto *NewALoad = dyn_cast(DAG.getAtomic( + ISD::ATOMIC_LOAD, SDLoc(ALoad), MemoryVT, VT, ALoad->getChain(), + ALoad->getBasePtr(), ALoad->getMemOperand())); + NewALoad->setExtensionType(ETy); + DAG.ReplaceAllUsesOfValueWith( + SDValue(ALoad, 0), + DAG.getNode(ISD::TRUNCATE, SDLoc(ALoad), OrigVT, SDValue(NewALoad, 0))); + // Update the chain uses. + DAG.ReplaceAllUsesOfValueWith(SDValue(ALoad, 1), SDValue(NewALoad, 1)); + return SDValue(NewALoad, 0); +} + SDValue SystemZTargetLowering::combineZERO_EXTEND( SDNode *N, DAGCombinerInfo &DCI) const { // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2') @@ -6611,6 +6619,13 @@ SDValue SystemZTargetLowering::combineZERO_EXTEND( return NewSelect; } } + + // Fold into ATOMIC_LOAD unless it is already sign extending. + if (auto *ALoad = dyn_cast(N0)) + if (ALoad->getOpcode() == ISD::ATOMIC_LOAD && + ALoad->getExtensionType() != ISD::SEXTLOAD) + return extendAtomicLoad(ALoad, VT, DAG, ISD::ZEXTLOAD); + return SDValue(); } @@ -6662,6 +6677,13 @@ SDValue SystemZTargetLowering::combineSIGN_EXTEND( } } } + + // Fold into ATOMIC_LOAD unless it is already zero extending. + if (auto *ALoad = dyn_cast(N0)) + if (ALoad->getOpcode() == ISD::ATOMIC_LOAD && + ALoad->getExtensionType() != ISD::ZEXTLOAD) + return extendAtomicLoad(ALoad, VT, DAG, ISD::SEXTLOAD); + return SDValue(); } diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index baf4ba4165487..406a13b9281ca 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -474,6 +474,8 @@ class SystemZTargetLowering : public TargetLowering { return VT != MVT::f64; } bool hasInlineStackProbe(const MachineFunction &MF) const override; + AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override; + AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override; AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const override; bool isLegalICmpImmediate(int64_t Imm) const override; @@ -692,8 +694,7 @@ class SystemZTargetLowering : public TargetLowering { SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerCTPOP(SDValue Op, SelectionDAG &DAG) const; SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerATOMIC_LDST_I128(SDValue Op, SelectionDAG &DAG) const; SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG, unsigned Opcode) const; SDValue lowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/llvm/lib/Target/SystemZ/SystemZInstrFP.td index 6e67425c1e788..f4b5aeaebef92 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrFP.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrFP.td @@ -129,8 +129,8 @@ defm LoadStoreF128 : MVCLoadStore; //===----------------------------------------------------------------------===// let canFoldAsLoad = 1, SimpleBDXLoad = 1, mayLoad = 1 in { - defm LE : UnaryRXPair<"le", 0x78, 0xED64, load, FP32, 4>; - defm LD : UnaryRXPair<"ld", 0x68, 0xED65, load, FP64, 8>; + defm LE : UnaryRXPair<"le", 0x78, 0xED64, z_load, FP32, 4>; + defm LD : UnaryRXPair<"ld", 0x68, 0xED65, z_load, FP64, 8>; // For z13 we prefer LDE over LE to avoid partial register dependencies. let isCodeGenOnly = 1 in @@ -200,14 +200,14 @@ let Predicates = [FeatureNoVectorEnhancements1] in { // Extend memory floating-point values to wider representations. let Uses = [FPC], mayRaiseFPException = 1 in { - def LDEB : UnaryRXE<"ldeb", 0xED04, any_extloadf32, FP64, 4>; + def LDEB : UnaryRXE<"ldeb", 0xED04, z_any_extloadf32, FP64, 4>; def LXEB : UnaryRXE<"lxeb", 0xED06, null_frag, FP128, 4>; def LXDB : UnaryRXE<"lxdb", 0xED05, null_frag, FP128, 8>; } let Predicates = [FeatureNoVectorEnhancements1] in { - def : Pat<(f128 (any_extloadf32 bdxaddr12only:$src)), + def : Pat<(f128 (z_any_extloadf32 bdxaddr12only:$src)), (LXEB bdxaddr12only:$src)>; - def : Pat<(f128 (any_extloadf64 bdxaddr12only:$src)), + def : Pat<(f128 (z_any_extloadf64 bdxaddr12only:$src)), (LXDB bdxaddr12only:$src)>; } @@ -430,8 +430,8 @@ let Uses = [FPC], mayRaiseFPException = 1, def ADBR : BinaryRRE<"adbr", 0xB31A, any_fadd, FP64, FP64>; def AXBR : BinaryRRE<"axbr", 0xB34A, any_fadd, FP128, FP128>; } - defm AEB : BinaryRXEAndPseudo<"aeb", 0xED0A, any_fadd, FP32, load, 4>; - defm ADB : BinaryRXEAndPseudo<"adb", 0xED1A, any_fadd, FP64, load, 8>; + defm AEB : BinaryRXEAndPseudo<"aeb", 0xED0A, any_fadd, FP32, z_load, 4>; + defm ADB : BinaryRXEAndPseudo<"adb", 0xED1A, any_fadd, FP64, z_load, 8>; } // Subtraction. @@ -441,8 +441,8 @@ let Uses = [FPC], mayRaiseFPException = 1, def SDBR : BinaryRRE<"sdbr", 0xB31B, any_fsub, FP64, FP64>; def SXBR : BinaryRRE<"sxbr", 0xB34B, any_fsub, FP128, FP128>; - defm SEB : BinaryRXEAndPseudo<"seb", 0xED0B, any_fsub, FP32, load, 4>; - defm SDB : BinaryRXEAndPseudo<"sdb", 0xED1B, any_fsub, FP64, load, 8>; + defm SEB : BinaryRXEAndPseudo<"seb", 0xED0B, any_fsub, FP32, z_load, 4>; + defm SDB : BinaryRXEAndPseudo<"sdb", 0xED1B, any_fsub, FP64, z_load, 8>; } // Multiplication. @@ -452,8 +452,8 @@ let Uses = [FPC], mayRaiseFPException = 1 in { def MDBR : BinaryRRE<"mdbr", 0xB31C, any_fmul, FP64, FP64>; def MXBR : BinaryRRE<"mxbr", 0xB34C, any_fmul, FP128, FP128>; } - defm MEEB : BinaryRXEAndPseudo<"meeb", 0xED17, any_fmul, FP32, load, 4>; - defm MDB : BinaryRXEAndPseudo<"mdb", 0xED1C, any_fmul, FP64, load, 8>; + defm MEEB : BinaryRXEAndPseudo<"meeb", 0xED17, any_fmul, FP32, z_load, 4>; + defm MDB : BinaryRXEAndPseudo<"mdb", 0xED1C, any_fmul, FP64, z_load, 8>; } // f64 multiplication of two FP32 registers. @@ -466,7 +466,7 @@ def : Pat<(any_fmul (f64 (any_fpextend FP32:$src1)), // f64 multiplication of an FP32 register and an f32 memory. let Uses = [FPC], mayRaiseFPException = 1 in - def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>; + def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, z_load, 4>; def : Pat<(any_fmul (f64 (any_fpextend FP32:$src1)), (f64 (any_extloadf32 bdxaddr12only:$addr))), (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_h32), @@ -483,7 +483,7 @@ let Predicates = [FeatureNoVectorEnhancements1] in // f128 multiplication of an FP64 register and an f64 memory. let Uses = [FPC], mayRaiseFPException = 1 in - def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load, 8>; + def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, z_load, 8>; let Predicates = [FeatureNoVectorEnhancements1] in def : Pat<(any_fmul (f128 (any_fpextend FP64:$src1)), (f128 (any_extloadf64 bdxaddr12only:$addr))), @@ -495,8 +495,8 @@ let Uses = [FPC], mayRaiseFPException = 1 in { def MAEBR : TernaryRRD<"maebr", 0xB30E, z_any_fma, FP32, FP32>; def MADBR : TernaryRRD<"madbr", 0xB31E, z_any_fma, FP64, FP64>; - defm MAEB : TernaryRXFAndPseudo<"maeb", 0xED0E, z_any_fma, FP32, FP32, load, 4>; - defm MADB : TernaryRXFAndPseudo<"madb", 0xED1E, z_any_fma, FP64, FP64, load, 8>; + defm MAEB : TernaryRXFAndPseudo<"maeb", 0xED0E, z_any_fma, FP32, FP32, z_load, 4>; + defm MADB : TernaryRXFAndPseudo<"madb", 0xED1E, z_any_fma, FP64, FP64, z_load, 8>; } // Fused multiply-subtract. @@ -504,8 +504,8 @@ let Uses = [FPC], mayRaiseFPException = 1 in { def MSEBR : TernaryRRD<"msebr", 0xB30F, z_any_fms, FP32, FP32>; def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_any_fms, FP64, FP64>; - defm MSEB : TernaryRXFAndPseudo<"mseb", 0xED0F, z_any_fms, FP32, FP32, load, 4>; - defm MSDB : TernaryRXFAndPseudo<"msdb", 0xED1F, z_any_fms, FP64, FP64, load, 8>; + defm MSEB : TernaryRXFAndPseudo<"mseb", 0xED0F, z_any_fms, FP32, FP32, z_load, 4>; + defm MSDB : TernaryRXFAndPseudo<"msdb", 0xED1F, z_any_fms, FP64, FP64, z_load, 8>; } // Division. @@ -514,8 +514,8 @@ let Uses = [FPC], mayRaiseFPException = 1 in { def DDBR : BinaryRRE<"ddbr", 0xB31D, any_fdiv, FP64, FP64>; def DXBR : BinaryRRE<"dxbr", 0xB34D, any_fdiv, FP128, FP128>; - defm DEB : BinaryRXEAndPseudo<"deb", 0xED0D, any_fdiv, FP32, load, 4>; - defm DDB : BinaryRXEAndPseudo<"ddb", 0xED1D, any_fdiv, FP64, load, 8>; + defm DEB : BinaryRXEAndPseudo<"deb", 0xED0D, any_fdiv, FP32, z_load, 4>; + defm DDB : BinaryRXEAndPseudo<"ddb", 0xED1D, any_fdiv, FP64, z_load, 8>; } // Divide to integer. @@ -533,15 +533,15 @@ let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC], CCValues = 0xF in { def CDBR : CompareRRE<"cdbr", 0xB319, z_any_fcmp, FP64, FP64>; def CXBR : CompareRRE<"cxbr", 0xB349, z_any_fcmp, FP128, FP128>; - def CEB : CompareRXE<"ceb", 0xED09, z_any_fcmp, FP32, load, 4>; - def CDB : CompareRXE<"cdb", 0xED19, z_any_fcmp, FP64, load, 8>; + def CEB : CompareRXE<"ceb", 0xED09, z_any_fcmp, FP32, z_load, 4>; + def CDB : CompareRXE<"cdb", 0xED19, z_any_fcmp, FP64, z_load, 8>; def KEBR : CompareRRE<"kebr", 0xB308, z_strict_fcmps, FP32, FP32>; def KDBR : CompareRRE<"kdbr", 0xB318, z_strict_fcmps, FP64, FP64>; def KXBR : CompareRRE<"kxbr", 0xB348, z_strict_fcmps, FP128, FP128>; - def KEB : CompareRXE<"keb", 0xED08, z_strict_fcmps, FP32, load, 4>; - def KDB : CompareRXE<"kdb", 0xED18, z_strict_fcmps, FP64, load, 8>; + def KEB : CompareRXE<"keb", 0xED08, z_strict_fcmps, FP32, z_load, 4>; + def KDB : CompareRXE<"kdb", 0xED18, z_strict_fcmps, FP64, z_load, 8>; } // Test Data Class. diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td index bb9fa0fc33ffa..3dba33b66bf4f 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td @@ -3777,7 +3777,7 @@ class BinarySI opcode, SDPatternOperator operator, Operand imm, AddressingMode mode = bdaddr12only> : InstSI { + [(store (operator (z_load mode:$BD1), imm:$I2), mode:$BD1)]> { let mayLoad = 1; let mayStore = 1; } @@ -3786,7 +3786,7 @@ class BinarySIY opcode, SDPatternOperator operator, Operand imm, AddressingMode mode = bdaddr20only> : InstSIY { + [(store (operator (z_load mode:$BD1), imm:$I2), mode:$BD1)]> { let mayLoad = 1; let mayStore = 1; } diff --git a/llvm/lib/Target/SystemZ/SystemZInstrHFP.td b/llvm/lib/Target/SystemZ/SystemZInstrHFP.td index 2e3c9932d6214..d2e05b63c6c63 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrHFP.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrHFP.td @@ -134,8 +134,8 @@ let Defs = [CC] in { def ADR : BinaryRR<"adr", 0x2A, null_frag, FP64, FP64>; def AXR : BinaryRR<"axr", 0x36, null_frag, FP128, FP128>; } - def AE : BinaryRX<"ae", 0x7A, null_frag, FP32, load, 4>; - def AD : BinaryRX<"ad", 0x6A, null_frag, FP64, load, 8>; + def AE : BinaryRX<"ae", 0x7A, null_frag, FP32, z_load, 4>; + def AD : BinaryRX<"ad", 0x6A, null_frag, FP64, z_load, 8>; } // Addition (unnormalized). @@ -144,8 +144,8 @@ let Defs = [CC] in { def AUR : BinaryRR<"aur", 0x3E, null_frag, FP32, FP32>; def AWR : BinaryRR<"awr", 0x2E, null_frag, FP64, FP64>; } - def AU : BinaryRX<"au", 0x7E, null_frag, FP32, load, 4>; - def AW : BinaryRX<"aw", 0x6E, null_frag, FP64, load, 8>; + def AU : BinaryRX<"au", 0x7E, null_frag, FP32, z_load, 4>; + def AW : BinaryRX<"aw", 0x6E, null_frag, FP64, z_load, 8>; } // Subtraction. @@ -154,8 +154,8 @@ let Defs = [CC] in { def SDR : BinaryRR<"sdr", 0x2B, null_frag, FP64, FP64>; def SXR : BinaryRR<"sxr", 0x37, null_frag, FP128, FP128>; - def SE : BinaryRX<"se", 0x7B, null_frag, FP32, load, 4>; - def SD : BinaryRX<"sd", 0x6B, null_frag, FP64, load, 8>; + def SE : BinaryRX<"se", 0x7B, null_frag, FP32, z_load, 4>; + def SD : BinaryRX<"sd", 0x6B, null_frag, FP64, z_load, 8>; } // Subtraction (unnormalized). @@ -163,8 +163,8 @@ let Defs = [CC] in { def SUR : BinaryRR<"sur", 0x3F, null_frag, FP32, FP32>; def SWR : BinaryRR<"swr", 0x2F, null_frag, FP64, FP64>; - def SU : BinaryRX<"su", 0x7F, null_frag, FP32, load, 4>; - def SW : BinaryRX<"sw", 0x6F, null_frag, FP64, load, 8>; + def SU : BinaryRX<"su", 0x7F, null_frag, FP32, z_load, 4>; + def SW : BinaryRX<"sw", 0x6F, null_frag, FP64, z_load, 8>; } // Multiplication. @@ -173,55 +173,55 @@ let isCommutable = 1 in { def MDR : BinaryRR <"mdr", 0x2C, null_frag, FP64, FP64>; def MXR : BinaryRR <"mxr", 0x26, null_frag, FP128, FP128>; } -def MEE : BinaryRXE<"mee", 0xED37, null_frag, FP32, load, 4>; -def MD : BinaryRX <"md", 0x6C, null_frag, FP64, load, 8>; +def MEE : BinaryRXE<"mee", 0xED37, null_frag, FP32, z_load, 4>; +def MD : BinaryRX <"md", 0x6C, null_frag, FP64, z_load, 8>; // Extending multiplication (f32 x f32 -> f64). def MDER : BinaryRR<"mder", 0x3C, null_frag, FP64, FP32>; -def MDE : BinaryRX<"mde", 0x7C, null_frag, FP64, load, 4>; +def MDE : BinaryRX<"mde", 0x7C, null_frag, FP64, z_load, 4>; let isAsmParserOnly = 1 in { def MER : BinaryRR<"mer", 0x3C, null_frag, FP64, FP32>; - def ME : BinaryRX<"me", 0x7C, null_frag, FP64, load, 4>; + def ME : BinaryRX<"me", 0x7C, null_frag, FP64, z_load, 4>; } // Extending multiplication (f64 x f64 -> f128). def MXDR : BinaryRR<"mxdr", 0x27, null_frag, FP128, FP64>; -def MXD : BinaryRX<"mxd", 0x67, null_frag, FP128, load, 8>; +def MXD : BinaryRX<"mxd", 0x67, null_frag, FP128, z_load, 8>; // Fused multiply-add. def MAER : TernaryRRD<"maer", 0xB32E, null_frag, FP32, FP32>; def MADR : TernaryRRD<"madr", 0xB33E, null_frag, FP64, FP64>; -def MAE : TernaryRXF<"mae", 0xED2E, null_frag, FP32, FP32, load, 4>; -def MAD : TernaryRXF<"mad", 0xED3E, null_frag, FP64, FP64, load, 8>; +def MAE : TernaryRXF<"mae", 0xED2E, null_frag, FP32, FP32, z_load, 4>; +def MAD : TernaryRXF<"mad", 0xED3E, null_frag, FP64, FP64, z_load, 8>; // Fused multiply-subtract. def MSER : TernaryRRD<"mser", 0xB32F, null_frag, FP32, FP32>; def MSDR : TernaryRRD<"msdr", 0xB33F, null_frag, FP64, FP64>; -def MSE : TernaryRXF<"mse", 0xED2F, null_frag, FP32, FP32, load, 4>; -def MSD : TernaryRXF<"msd", 0xED3F, null_frag, FP64, FP64, load, 8>; +def MSE : TernaryRXF<"mse", 0xED2F, null_frag, FP32, FP32, z_load, 4>; +def MSD : TernaryRXF<"msd", 0xED3F, null_frag, FP64, FP64, z_load, 8>; // Multiplication (unnormalized). def MYR : BinaryRRD<"myr", 0xB33B, null_frag, FP128, FP64>; def MYHR : BinaryRRD<"myhr", 0xB33D, null_frag, FP64, FP64>; def MYLR : BinaryRRD<"mylr", 0xB339, null_frag, FP64, FP64>; -def MY : BinaryRXF<"my", 0xED3B, null_frag, FP128, FP64, load, 8>; -def MYH : BinaryRXF<"myh", 0xED3D, null_frag, FP64, FP64, load, 8>; -def MYL : BinaryRXF<"myl", 0xED39, null_frag, FP64, FP64, load, 8>; +def MY : BinaryRXF<"my", 0xED3B, null_frag, FP128, FP64, z_load, 8>; +def MYH : BinaryRXF<"myh", 0xED3D, null_frag, FP64, FP64, z_load, 8>; +def MYL : BinaryRXF<"myl", 0xED39, null_frag, FP64, FP64, z_load, 8>; // Fused multiply-add (unnormalized). def MAYR : TernaryRRD<"mayr", 0xB33A, null_frag, FP128, FP64>; def MAYHR : TernaryRRD<"mayhr", 0xB33C, null_frag, FP64, FP64>; def MAYLR : TernaryRRD<"maylr", 0xB338, null_frag, FP64, FP64>; -def MAY : TernaryRXF<"may", 0xED3A, null_frag, FP128, FP64, load, 8>; -def MAYH : TernaryRXF<"mayh", 0xED3C, null_frag, FP64, FP64, load, 8>; -def MAYL : TernaryRXF<"mayl", 0xED38, null_frag, FP64, FP64, load, 8>; +def MAY : TernaryRXF<"may", 0xED3A, null_frag, FP128, FP64, z_load, 8>; +def MAYH : TernaryRXF<"mayh", 0xED3C, null_frag, FP64, FP64, z_load, 8>; +def MAYL : TernaryRXF<"mayl", 0xED38, null_frag, FP64, FP64, z_load, 8>; // Division. def DER : BinaryRR <"der", 0x3D, null_frag, FP32, FP32>; def DDR : BinaryRR <"ddr", 0x2D, null_frag, FP64, FP64>; def DXR : BinaryRRE<"dxr", 0xB22D, null_frag, FP128, FP128>; -def DE : BinaryRX <"de", 0x7D, null_frag, FP32, load, 4>; -def DD : BinaryRX <"dd", 0x6D, null_frag, FP64, load, 8>; +def DE : BinaryRX <"de", 0x7D, null_frag, FP32, z_load, 4>; +def DD : BinaryRX <"dd", 0x6D, null_frag, FP64, z_load, 8>; //===----------------------------------------------------------------------===// @@ -233,7 +233,7 @@ let Defs = [CC] in { def CDR : CompareRR <"cdr", 0x29, null_frag, FP64, FP64>; def CXR : CompareRRE<"cxr", 0xB369, null_frag, FP128, FP128>; - def CE : CompareRX<"ce", 0x79, null_frag, FP32, load, 4>; - def CD : CompareRX<"cd", 0x69, null_frag, FP64, load, 8>; + def CE : CompareRX<"ce", 0x79, null_frag, FP32, z_load, 4>; + def CD : CompareRX<"cd", 0x69, null_frag, FP64, z_load, 8>; } diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td index 937e36057a6ed..96ea65b6c3d88 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -306,7 +306,7 @@ let Predicates = [IsTargetXPLINK64] in { let mayLoad = 1, AddedComplexity = 20, hasNoSchedulingInfo = 1, Defs = [CC] in { def ADA_ENTRY_VALUE : Alias<12, (outs GR64:$Reg), (ins adasym:$addr, ADDR64:$ADA, imm64:$Offset), - [(set i64:$Reg, (load (z_ada_entry + [(set i64:$Reg, (z_load (z_ada_entry iPTR:$addr, iPTR:$ADA, i64:$Offset)))]>; } } @@ -468,12 +468,12 @@ let isAsCheapAsAMove = 1, isMoveImm = 1, isReMaterializable = 1 in { // Register loads. let canFoldAsLoad = 1, SimpleBDXLoad = 1, mayLoad = 1 in { // Expands to L, LY or LFH, depending on the choice of register. - def LMux : UnaryRXYPseudo<"l", load, GRX32, 4>, + def LMux : UnaryRXYPseudo<"l", z_load, GRX32, 4>, Requires<[FeatureHighWord]>; - defm L : UnaryRXPair<"l", 0x58, 0xE358, load, GR32, 4>; - def LFH : UnaryRXY<"lfh", 0xE3CA, load, GRH32, 4>, + defm L : UnaryRXPair<"l", 0x58, 0xE358, z_load, GR32, 4>; + def LFH : UnaryRXY<"lfh", 0xE3CA, z_load, GRH32, 4>, Requires<[FeatureHighWord]>; - def LG : UnaryRXY<"lg", 0xE304, load, GR64, 8>; + def LG : UnaryRXY<"lg", 0xE304, z_load, GR64, 8>; // These instructions are split after register allocation, so we don't // want a custom inserter. @@ -483,22 +483,22 @@ let canFoldAsLoad = 1, SimpleBDXLoad = 1, mayLoad = 1 in { } } let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in { - def LT : UnaryRXY<"lt", 0xE312, load, GR32, 4>; - def LTG : UnaryRXY<"ltg", 0xE302, load, GR64, 8>; + def LT : UnaryRXY<"lt", 0xE312, z_load, GR32, 4>; + def LTG : UnaryRXY<"ltg", 0xE302, z_load, GR64, 8>; } let canFoldAsLoad = 1 in { - def LRL : UnaryRILPC<"lrl", 0xC4D, aligned_load, GR32>; - def LGRL : UnaryRILPC<"lgrl", 0xC48, aligned_load, GR64>; + def LRL : UnaryRILPC<"lrl", 0xC4D, aligned_z_load, GR32>; + def LGRL : UnaryRILPC<"lgrl", 0xC48, aligned_z_load, GR64>; } // Load and zero rightmost byte. let Predicates = [FeatureLoadAndZeroRightmostByte] in { def LZRF : UnaryRXY<"lzrf", 0xE33B, null_frag, GR32, 4>; def LZRG : UnaryRXY<"lzrg", 0xE32A, null_frag, GR64, 8>; - def : Pat<(and (i32 (load bdxaddr20only:$src)), 0xffffff00), + def : Pat<(and (i32 (z_load bdxaddr20only:$src)), 0xffffff00), (LZRF bdxaddr20only:$src)>; - def : Pat<(and (i64 (load bdxaddr20only:$src)), 0xffffffffffffff00), + def : Pat<(and (i64 (z_load bdxaddr20only:$src)), 0xffffffffffffff00), (LZRG bdxaddr20only:$src)>; } @@ -689,29 +689,29 @@ def : Pat<(sext_inreg GR64:$src, i32), // 32-bit extensions from 8-bit memory. LBMux expands to LB or LBH, // depending on the choice of register. -def LBMux : UnaryRXYPseudo<"lb", asextloadi8, GRX32, 1>, +def LBMux : UnaryRXYPseudo<"lb", z_asextloadi8, GRX32, 1>, Requires<[FeatureHighWord]>; -def LB : UnaryRXY<"lb", 0xE376, asextloadi8, GR32, 1>; -def LBH : UnaryRXY<"lbh", 0xE3C0, asextloadi8, GRH32, 1>, +def LB : UnaryRXY<"lb", 0xE376, z_asextloadi8, GR32, 1>; +def LBH : UnaryRXY<"lbh", 0xE3C0, z_asextloadi8, GRH32, 1>, Requires<[FeatureHighWord]>; // 32-bit extensions from 16-bit memory. LHMux expands to LH or LHH, // depending on the choice of register. -def LHMux : UnaryRXYPseudo<"lh", asextloadi16, GRX32, 2>, +def LHMux : UnaryRXYPseudo<"lh", z_asextloadi16, GRX32, 2>, Requires<[FeatureHighWord]>; -defm LH : UnaryRXPair<"lh", 0x48, 0xE378, asextloadi16, GR32, 2>; -def LHH : UnaryRXY<"lhh", 0xE3C4, asextloadi16, GRH32, 2>, +defm LH : UnaryRXPair<"lh", 0x48, 0xE378, z_asextloadi16, GR32, 2>; +def LHH : UnaryRXY<"lhh", 0xE3C4, z_asextloadi16, GRH32, 2>, Requires<[FeatureHighWord]>; -def LHRL : UnaryRILPC<"lhrl", 0xC45, aligned_asextloadi16, GR32>; +def LHRL : UnaryRILPC<"lhrl", 0xC45, aligned_z_asextloadi16, GR32>; // 64-bit extensions from memory. -def LGB : UnaryRXY<"lgb", 0xE377, asextloadi8, GR64, 1>; -def LGH : UnaryRXY<"lgh", 0xE315, asextloadi16, GR64, 2>; -def LGF : UnaryRXY<"lgf", 0xE314, asextloadi32, GR64, 4>; -def LGHRL : UnaryRILPC<"lghrl", 0xC44, aligned_asextloadi16, GR64>; -def LGFRL : UnaryRILPC<"lgfrl", 0xC4C, aligned_asextloadi32, GR64>; +def LGB : UnaryRXY<"lgb", 0xE377, z_asextloadi8, GR64, 1>; +def LGH : UnaryRXY<"lgh", 0xE315, z_asextloadi16, GR64, 2>; +def LGF : UnaryRXY<"lgf", 0xE314, z_asextloadi32, GR64, 4>; +def LGHRL : UnaryRILPC<"lghrl", 0xC44, aligned_z_asextloadi16, GR64>; +def LGFRL : UnaryRILPC<"lgfrl", 0xC4C, aligned_z_asextloadi32, GR64>; let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in - def LTGF : UnaryRXY<"ltgf", 0xE332, asextloadi32, GR64, 4>; + def LTGF : UnaryRXY<"ltgf", 0xE332, z_asextloadi32, GR64, 4>; //===----------------------------------------------------------------------===// // Zero extensions @@ -740,40 +740,40 @@ def : Pat<(and GR64:$src, 0xffffffff), // 32-bit extensions from 8-bit memory. LLCMux expands to LLC or LLCH, // depending on the choice of register. -def LLCMux : UnaryRXYPseudo<"llc", azextloadi8, GRX32, 1>, +def LLCMux : UnaryRXYPseudo<"llc", z_azextloadi8, GRX32, 1>, Requires<[FeatureHighWord]>; -def LLC : UnaryRXY<"llc", 0xE394, azextloadi8, GR32, 1>; -def LLCH : UnaryRXY<"llch", 0xE3C2, azextloadi8, GRH32, 1>, +def LLC : UnaryRXY<"llc", 0xE394, z_azextloadi8, GR32, 1>; +def LLCH : UnaryRXY<"llch", 0xE3C2, z_azextloadi8, GRH32, 1>, Requires<[FeatureHighWord]>; // 32-bit extensions from 16-bit memory. LLHMux expands to LLH or LLHH, // depending on the choice of register. -def LLHMux : UnaryRXYPseudo<"llh", azextloadi16, GRX32, 2>, +def LLHMux : UnaryRXYPseudo<"llh", z_azextloadi16, GRX32, 2>, Requires<[FeatureHighWord]>; -def LLH : UnaryRXY<"llh", 0xE395, azextloadi16, GR32, 2>; -def LLHH : UnaryRXY<"llhh", 0xE3C6, azextloadi16, GRH32, 2>, +def LLH : UnaryRXY<"llh", 0xE395, z_azextloadi16, GR32, 2>; +def LLHH : UnaryRXY<"llhh", 0xE3C6, z_azextloadi16, GRH32, 2>, Requires<[FeatureHighWord]>; -def LLHRL : UnaryRILPC<"llhrl", 0xC42, aligned_azextloadi16, GR32>; +def LLHRL : UnaryRILPC<"llhrl", 0xC42, aligned_z_azextloadi16, GR32>; // 64-bit extensions from memory. -def LLGC : UnaryRXY<"llgc", 0xE390, azextloadi8, GR64, 1>; -def LLGH : UnaryRXY<"llgh", 0xE391, azextloadi16, GR64, 2>; -def LLGF : UnaryRXY<"llgf", 0xE316, azextloadi32, GR64, 4>; -def LLGHRL : UnaryRILPC<"llghrl", 0xC46, aligned_azextloadi16, GR64>; -def LLGFRL : UnaryRILPC<"llgfrl", 0xC4E, aligned_azextloadi32, GR64>; +def LLGC : UnaryRXY<"llgc", 0xE390, z_azextloadi8, GR64, 1>; +def LLGH : UnaryRXY<"llgh", 0xE391, z_azextloadi16, GR64, 2>; +def LLGF : UnaryRXY<"llgf", 0xE316, z_azextloadi32, GR64, 4>; +def LLGHRL : UnaryRILPC<"llghrl", 0xC46, aligned_z_azextloadi16, GR64>; +def LLGFRL : UnaryRILPC<"llgfrl", 0xC4E, aligned_z_azextloadi32, GR64>; // 31-to-64-bit zero extensions. def LLGTR : UnaryRRE<"llgtr", 0xB917, null_frag, GR64, GR64>; def LLGT : UnaryRXY<"llgt", 0xE317, null_frag, GR64, 4>; def : Pat<(and GR64:$src, 0x7fffffff), (LLGTR GR64:$src)>; -def : Pat<(and (i64 (azextloadi32 bdxaddr20only:$src)), 0x7fffffff), +def : Pat<(and (i64 (z_azextloadi32 bdxaddr20only:$src)), 0x7fffffff), (LLGT bdxaddr20only:$src)>; // Load and zero rightmost byte. let Predicates = [FeatureLoadAndZeroRightmostByte] in { def LLZRGF : UnaryRXY<"llzrgf", 0xE33A, null_frag, GR64, 4>; - def : Pat<(and (i64 (azextloadi32 bdxaddr20only:$src)), 0xffffff00), + def : Pat<(and (i64 (z_azextloadi32 bdxaddr20only:$src)), 0xffffff00), (LLZRGF bdxaddr20only:$src)>; } @@ -930,14 +930,14 @@ defm : SXU; //===----------------------------------------------------------------------===// let isCodeGenOnly = 1 in - defm IC32 : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR32, azextloadi8, 1>; -defm IC : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR64, azextloadi8, 1>; + defm IC32 : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR32, z_azextloadi8, 1>; +defm IC : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR64, z_azextloadi8, 1>; -defm : InsertMem<"inserti8", IC32, GR32, azextloadi8, bdxaddr12pair>; -defm : InsertMem<"inserti8", IC32Y, GR32, azextloadi8, bdxaddr20pair>; +defm : InsertMem<"inserti8", IC32, GR32, z_azextloadi8, bdxaddr12pair>; +defm : InsertMem<"inserti8", IC32Y, GR32, z_azextloadi8, bdxaddr20pair>; -defm : InsertMem<"inserti8", IC, GR64, azextloadi8, bdxaddr12pair>; -defm : InsertMem<"inserti8", ICY, GR64, azextloadi8, bdxaddr20pair>; +defm : InsertMem<"inserti8", IC, GR64, z_azextloadi8, bdxaddr12pair>; +defm : InsertMem<"inserti8", ICY, GR64, z_azextloadi8, bdxaddr20pair>; // Insert characters under mask -- not (yet) used for codegen. let Defs = [CC] in { @@ -1015,12 +1015,12 @@ let Defs = [CC], CCValues = 0xF, CCIfNoSignedWrap = 1 in { def AGFI : BinaryRIL<"agfi", 0xC28, z_sadd, GR64, imm64sx32>; // Addition of memory. - defm AH : BinaryRXPair<"ah", 0x4A, 0xE37A, z_sadd, GR32, asextloadi16, 2>; - defm A : BinaryRXPairAndPseudo<"a", 0x5A, 0xE35A, z_sadd, GR32, load, 4>; - def AGH : BinaryRXY<"agh", 0xE338, z_sadd, GR64, asextloadi16, 2>, + defm AH : BinaryRXPair<"ah", 0x4A, 0xE37A, z_sadd, GR32, z_asextloadi16, 2>; + defm A : BinaryRXPairAndPseudo<"a", 0x5A, 0xE35A, z_sadd, GR32, z_load, 4>; + def AGH : BinaryRXY<"agh", 0xE338, z_sadd, GR64, z_asextloadi16, 2>, Requires<[FeatureMiscellaneousExtensions2]>; - def AGF : BinaryRXY<"agf", 0xE318, z_sadd, GR64, asextloadi32, 4>; - defm AG : BinaryRXYAndPseudo<"ag", 0xE308, z_sadd, GR64, load, 8>; + def AGF : BinaryRXY<"agf", 0xE318, z_sadd, GR64, z_asextloadi32, 4>; + defm AG : BinaryRXYAndPseudo<"ag", 0xE308, z_sadd, GR64, z_load, 8>; // Addition to memory. def ASI : BinarySIY<"asi", 0xEB6A, add, imm32sx8>; @@ -1058,9 +1058,9 @@ let Defs = [CC], CCValues = 0xF, IsLogical = 1 in { Requires<[FeatureHighWord]>; // Addition of memory. - defm AL : BinaryRXPairAndPseudo<"al", 0x5E, 0xE35E, z_uadd, GR32, load, 4>; - def ALGF : BinaryRXY<"algf", 0xE31A, z_uadd, GR64, azextloadi32, 4>; - defm ALG : BinaryRXYAndPseudo<"alg", 0xE30A, z_uadd, GR64, load, 8>; + defm AL : BinaryRXPairAndPseudo<"al", 0x5E, 0xE35E, z_uadd, GR32, z_load, 4>; + def ALGF : BinaryRXY<"algf", 0xE31A, z_uadd, GR64, z_azextloadi32, 4>; + defm ALG : BinaryRXYAndPseudo<"alg", 0xE30A, z_uadd, GR64, z_load, 8>; // Addition to memory. def ALSI : BinarySIY<"alsi", 0xEB6E, null_frag, imm32sx8>; @@ -1075,8 +1075,8 @@ let Defs = [CC], Uses = [CC], CCValues = 0xF, IsLogical = 1 in { def ALCGR : BinaryRRE<"alcgr", 0xB988, z_addcarry, GR64, GR64>; // Addition of memory. - def ALC : BinaryRXY<"alc", 0xE398, z_addcarry, GR32, load, 4>; - def ALCG : BinaryRXY<"alcg", 0xE388, z_addcarry, GR64, load, 8>; + def ALC : BinaryRXY<"alc", 0xE398, z_addcarry, GR32, z_load, 4>; + def ALCG : BinaryRXY<"alcg", 0xE388, z_addcarry, GR64, z_load, 8>; } // Addition that does not modify the condition code. @@ -1103,12 +1103,12 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8, Requires<[FeatureHighWord]>; // Subtraction of memory. - defm SH : BinaryRXPair<"sh", 0x4B, 0xE37B, z_ssub, GR32, asextloadi16, 2>; - defm S : BinaryRXPairAndPseudo<"s", 0x5B, 0xE35B, z_ssub, GR32, load, 4>; - def SGH : BinaryRXY<"sgh", 0xE339, z_ssub, GR64, asextloadi16, 2>, + defm SH : BinaryRXPair<"sh", 0x4B, 0xE37B, z_ssub, GR32, z_asextloadi16, 2>; + defm S : BinaryRXPairAndPseudo<"s", 0x5B, 0xE35B, z_ssub, GR32, z_load, 4>; + def SGH : BinaryRXY<"sgh", 0xE339, z_ssub, GR64, z_asextloadi16, 2>, Requires<[FeatureMiscellaneousExtensions2]>; - def SGF : BinaryRXY<"sgf", 0xE319, z_ssub, GR64, asextloadi32, 4>; - defm SG : BinaryRXYAndPseudo<"sg", 0xE309, z_ssub, GR64, load, 8>; + def SGF : BinaryRXY<"sgf", 0xE319, z_ssub, GR64, z_asextloadi32, 4>; + defm SG : BinaryRXYAndPseudo<"sg", 0xE309, z_ssub, GR64, z_load, 8>; } defm : SXB; @@ -1156,9 +1156,9 @@ let Defs = [CC], CCValues = 0x7, IsLogical = 1 in { def SLGFI : BinaryRIL<"slgfi", 0xC24, z_usub, GR64, imm64zx32>; // Subtraction of memory. - defm SL : BinaryRXPairAndPseudo<"sl", 0x5F, 0xE35F, z_usub, GR32, load, 4>; - def SLGF : BinaryRXY<"slgf", 0xE31B, z_usub, GR64, azextloadi32, 4>; - defm SLG : BinaryRXYAndPseudo<"slg", 0xE30B, z_usub, GR64, load, 8>; + defm SL : BinaryRXPairAndPseudo<"sl", 0x5F, 0xE35F, z_usub, GR32, z_load, 4>; + def SLGF : BinaryRXY<"slgf", 0xE31B, z_usub, GR64, z_azextloadi32, 4>; + defm SLG : BinaryRXYAndPseudo<"slg", 0xE30B, z_usub, GR64, z_load, 8>; } defm : ZXB; @@ -1183,8 +1183,8 @@ let Defs = [CC], Uses = [CC], CCValues = 0xF, IsLogical = 1 in { def SLBGR : BinaryRRE<"slbgr", 0xB989, z_subcarry, GR64, GR64>; // Subtraction of memory. - def SLB : BinaryRXY<"slb", 0xE399, z_subcarry, GR32, load, 4>; - def SLBG : BinaryRXY<"slbg", 0xE389, z_subcarry, GR64, load, 8>; + def SLB : BinaryRXY<"slb", 0xE399, z_subcarry, GR32, z_load, 4>; + def SLBG : BinaryRXY<"slbg", 0xE389, z_subcarry, GR64, z_load, 8>; } @@ -1233,8 +1233,8 @@ let Defs = [CC] in { // ANDs of memory. let CCValues = 0xC, CompareZeroCCMask = 0x8 in { - defm N : BinaryRXPairAndPseudo<"n", 0x54, 0xE354, and, GR32, load, 4>; - defm NG : BinaryRXYAndPseudo<"ng", 0xE380, and, GR64, load, 8>; + defm N : BinaryRXPairAndPseudo<"n", 0x54, 0xE354, and, GR32, z_load, 4>; + defm NG : BinaryRXYAndPseudo<"ng", 0xE380, and, GR64, z_load, 8>; } // AND to memory @@ -1290,8 +1290,8 @@ let Defs = [CC] in { // ORs of memory. let CCValues = 0xC, CompareZeroCCMask = 0x8 in { - defm O : BinaryRXPairAndPseudo<"o", 0x56, 0xE356, or, GR32, load, 4>; - defm OG : BinaryRXYAndPseudo<"og", 0xE381, or, GR64, load, 8>; + defm O : BinaryRXPairAndPseudo<"o", 0x56, 0xE356, or, GR32, z_load, 4>; + defm OG : BinaryRXYAndPseudo<"og", 0xE381, or, GR64, z_load, 8>; } // OR to memory @@ -1330,8 +1330,8 @@ let Defs = [CC] in { // XORs of memory. let CCValues = 0xC, CompareZeroCCMask = 0x8 in { - defm X : BinaryRXPairAndPseudo<"x",0x57, 0xE357, xor, GR32, load, 4>; - defm XG : BinaryRXYAndPseudo<"xg", 0xE382, xor, GR64, load, 8>; + defm X : BinaryRXPairAndPseudo<"x",0x57, 0xE357, xor, GR32, z_load, 4>; + defm XG : BinaryRXYAndPseudo<"xg", 0xE382, xor, GR64, z_load, 8>; } // XOR to memory @@ -1411,17 +1411,17 @@ def MSFI : BinaryRIL<"msfi", 0xC21, mul, GR32, simm32>; def MSGFI : BinaryRIL<"msgfi", 0xC20, mul, GR64, imm64sx32>; // Multiplication of memory. -defm MH : BinaryRXPair<"mh", 0x4C, 0xE37C, mul, GR32, asextloadi16, 2>; -defm MS : BinaryRXPair<"ms", 0x71, 0xE351, mul, GR32, load, 4>; -def MGH : BinaryRXY<"mgh", 0xE33C, mul, GR64, asextloadi16, 2>, +defm MH : BinaryRXPair<"mh", 0x4C, 0xE37C, mul, GR32, z_asextloadi16, 2>; +defm MS : BinaryRXPair<"ms", 0x71, 0xE351, mul, GR32, z_load, 4>; +def MGH : BinaryRXY<"mgh", 0xE33C, mul, GR64, z_asextloadi16, 2>, Requires<[FeatureMiscellaneousExtensions2]>; -def MSGF : BinaryRXY<"msgf", 0xE31C, mul, GR64, asextloadi32, 4>; -def MSG : BinaryRXY<"msg", 0xE30C, mul, GR64, load, 8>; +def MSGF : BinaryRXY<"msgf", 0xE31C, mul, GR64, z_asextloadi32, 4>; +def MSG : BinaryRXY<"msg", 0xE30C, mul, GR64, z_load, 8>; // Multiplication of memory, setting the condition code. let Predicates = [FeatureMiscellaneousExtensions2], Defs = [CC] in { - defm MSC : BinaryRXYAndPseudo<"msc", 0xE353, null_frag, GR32, load, 4>; - defm MSGC : BinaryRXYAndPseudo<"msgc", 0xE383, null_frag, GR64, load, 8>; + defm MSC : BinaryRXYAndPseudo<"msc", 0xE353, null_frag, GR32, z_load, 4>; + defm MSGC : BinaryRXYAndPseudo<"msgc", 0xE383, null_frag, GR64, z_load, 8>; } // Multiplication of a register, producing two results. @@ -1437,16 +1437,16 @@ def : Pat<(z_umul_lohi GR64:$src1, GR64:$src2), (MLGR (AEXT128 GR64:$src1), GR64:$src2)>; // Multiplication of memory, producing two results. -def M : BinaryRX <"m", 0x5C, null_frag, GR128, load, 4>; -def MFY : BinaryRXY<"mfy", 0xE35C, null_frag, GR128, load, 4>; -def MG : BinaryRXY<"mg", 0xE384, null_frag, GR128, load, 8>, +def M : BinaryRX <"m", 0x5C, null_frag, GR128, z_load, 4>; +def MFY : BinaryRXY<"mfy", 0xE35C, null_frag, GR128, z_load, 4>; +def MG : BinaryRXY<"mg", 0xE384, null_frag, GR128, z_load, 8>, Requires<[FeatureMiscellaneousExtensions2]>; -def ML : BinaryRXY<"ml", 0xE396, null_frag, GR128, load, 4>; -def MLG : BinaryRXY<"mlg", 0xE386, null_frag, GR128, load, 8>; +def ML : BinaryRXY<"ml", 0xE396, null_frag, GR128, z_load, 4>; +def MLG : BinaryRXY<"mlg", 0xE386, null_frag, GR128, z_load, 8>; -def : Pat<(z_smul_lohi GR64:$src1, (i64 (load bdxaddr20only:$src2))), +def : Pat<(z_smul_lohi GR64:$src1, (i64 (z_load bdxaddr20only:$src2))), (MG (AEXT128 GR64:$src1), bdxaddr20only:$src2)>; -def : Pat<(z_umul_lohi GR64:$src1, (i64 (load bdxaddr20only:$src2))), +def : Pat<(z_umul_lohi GR64:$src1, (i64 (z_load bdxaddr20only:$src2))), (MLG (AEXT128 GR64:$src1), bdxaddr20only:$src2)>; //===----------------------------------------------------------------------===// @@ -1462,30 +1462,30 @@ let hasSideEffects = 1 in { // Do not speculatively execute. def DLGR : BinaryRRE<"dlgr", 0xB987, null_frag, GR128, GR64>; // Division and remainder, from memory. - def D : BinaryRX <"d", 0x5D, null_frag, GR128, load, 4>; - def DSGF : BinaryRXY<"dsgf", 0xE31D, null_frag, GR128, load, 4>; - def DSG : BinaryRXY<"dsg", 0xE30D, null_frag, GR128, load, 8>; - def DL : BinaryRXY<"dl", 0xE397, null_frag, GR128, load, 4>; - def DLG : BinaryRXY<"dlg", 0xE387, null_frag, GR128, load, 8>; + def D : BinaryRX <"d", 0x5D, null_frag, GR128, z_load, 4>; + def DSGF : BinaryRXY<"dsgf", 0xE31D, null_frag, GR128, z_load, 4>; + def DSG : BinaryRXY<"dsg", 0xE30D, null_frag, GR128, z_load, 8>; + def DL : BinaryRXY<"dl", 0xE397, null_frag, GR128, z_load, 4>; + def DLG : BinaryRXY<"dlg", 0xE387, null_frag, GR128, z_load, 8>; } def : Pat<(z_sdivrem GR64:$src1, GR32:$src2), (DSGFR (AEXT128 GR64:$src1), GR32:$src2)>; -def : Pat<(z_sdivrem GR64:$src1, (i32 (load bdxaddr20only:$src2))), +def : Pat<(z_sdivrem GR64:$src1, (i32 (z_load bdxaddr20only:$src2))), (DSGF (AEXT128 GR64:$src1), bdxaddr20only:$src2)>; def : Pat<(z_sdivrem GR64:$src1, GR64:$src2), (DSGR (AEXT128 GR64:$src1), GR64:$src2)>; -def : Pat<(z_sdivrem GR64:$src1, (i64 (load bdxaddr20only:$src2))), +def : Pat<(z_sdivrem GR64:$src1, (i64 (z_load bdxaddr20only:$src2))), (DSG (AEXT128 GR64:$src1), bdxaddr20only:$src2)>; def : Pat<(z_udivrem GR32:$src1, GR32:$src2), (DLR (ZEXT128 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src1, subreg_l32)), GR32:$src2)>; -def : Pat<(z_udivrem GR32:$src1, (i32 (load bdxaddr20only:$src2))), +def : Pat<(z_udivrem GR32:$src1, (i32 (z_load bdxaddr20only:$src2))), (DL (ZEXT128 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src1, subreg_l32)), bdxaddr20only:$src2)>; def : Pat<(z_udivrem GR64:$src1, GR64:$src2), (DLGR (ZEXT128 GR64:$src1), GR64:$src2)>; -def : Pat<(z_udivrem GR64:$src1, (i64 (load bdxaddr20only:$src2))), +def : Pat<(z_udivrem GR64:$src1, (i64 (z_load bdxaddr20only:$src2))), (DLG (ZEXT128 GR64:$src1), bdxaddr20only:$src2)>; //===----------------------------------------------------------------------===// @@ -1591,25 +1591,25 @@ let Defs = [CC], CCValues = 0xE in { def CGFI : CompareRIL<"cgfi", 0xC2C, z_scmp, GR64, imm64sx32>; // Comparison with memory. - defm CH : CompareRXPair<"ch", 0x49, 0xE379, z_scmp, GR32, asextloadi16, 2>; - def CMux : CompareRXYPseudo, + defm CH : CompareRXPair<"ch", 0x49, 0xE379, z_scmp, GR32, z_asextloadi16, 2>; + def CMux : CompareRXYPseudo, Requires<[FeatureHighWord]>; - defm C : CompareRXPair<"c", 0x59, 0xE359, z_scmp, GR32, load, 4>; - def CHF : CompareRXY<"chf", 0xE3CD, z_scmp, GRH32, load, 4>, + defm C : CompareRXPair<"c", 0x59, 0xE359, z_scmp, GR32, z_load, 4>; + def CHF : CompareRXY<"chf", 0xE3CD, z_scmp, GRH32, z_load, 4>, Requires<[FeatureHighWord]>; - def CGH : CompareRXY<"cgh", 0xE334, z_scmp, GR64, asextloadi16, 2>; - def CGF : CompareRXY<"cgf", 0xE330, z_scmp, GR64, asextloadi32, 4>; - def CG : CompareRXY<"cg", 0xE320, z_scmp, GR64, load, 8>; - def CHRL : CompareRILPC<"chrl", 0xC65, z_scmp, GR32, aligned_asextloadi16>; - def CRL : CompareRILPC<"crl", 0xC6D, z_scmp, GR32, aligned_load>; - def CGHRL : CompareRILPC<"cghrl", 0xC64, z_scmp, GR64, aligned_asextloadi16>; - def CGFRL : CompareRILPC<"cgfrl", 0xC6C, z_scmp, GR64, aligned_asextloadi32>; - def CGRL : CompareRILPC<"cgrl", 0xC68, z_scmp, GR64, aligned_load>; + def CGH : CompareRXY<"cgh", 0xE334, z_scmp, GR64, z_asextloadi16, 2>; + def CGF : CompareRXY<"cgf", 0xE330, z_scmp, GR64, z_asextloadi32, 4>; + def CG : CompareRXY<"cg", 0xE320, z_scmp, GR64, z_load, 8>; + def CHRL : CompareRILPC<"chrl", 0xC65, z_scmp, GR32, aligned_z_asextloadi16>; + def CRL : CompareRILPC<"crl", 0xC6D, z_scmp, GR32, aligned_z_load>; + def CGHRL : CompareRILPC<"cghrl", 0xC64, z_scmp, GR64, aligned_z_asextloadi16>; + def CGFRL : CompareRILPC<"cgfrl", 0xC6C, z_scmp, GR64, aligned_z_asextloadi32>; + def CGRL : CompareRILPC<"cgrl", 0xC68, z_scmp, GR64, aligned_z_load>; // Comparison between memory and a signed 16-bit immediate. - def CHHSI : CompareSIL<"chhsi", 0xE554, z_scmp, asextloadi16, imm32sx16>; - def CHSI : CompareSIL<"chsi", 0xE55C, z_scmp, load, imm32sx16>; - def CGHSI : CompareSIL<"cghsi", 0xE558, z_scmp, load, imm64sx16>; + def CHHSI : CompareSIL<"chhsi", 0xE554, z_scmp, z_asextloadi16, imm32sx16>; + def CHSI : CompareSIL<"chsi", 0xE55C, z_scmp, z_load, imm32sx16>; + def CGHSI : CompareSIL<"cghsi", 0xE558, z_scmp, z_load, imm64sx16>; } defm : SXB; @@ -1636,31 +1636,31 @@ let Defs = [CC], CCValues = 0xE, IsLogical = 1 in { def CLGFI : CompareRIL<"clgfi", 0xC2E, z_ucmp, GR64, imm64zx32>; // Comparison with memory. - def CLMux : CompareRXYPseudo, + def CLMux : CompareRXYPseudo, Requires<[FeatureHighWord]>; - defm CL : CompareRXPair<"cl", 0x55, 0xE355, z_ucmp, GR32, load, 4>; - def CLHF : CompareRXY<"clhf", 0xE3CF, z_ucmp, GRH32, load, 4>, + defm CL : CompareRXPair<"cl", 0x55, 0xE355, z_ucmp, GR32, z_load, 4>; + def CLHF : CompareRXY<"clhf", 0xE3CF, z_ucmp, GRH32, z_load, 4>, Requires<[FeatureHighWord]>; - def CLGF : CompareRXY<"clgf", 0xE331, z_ucmp, GR64, azextloadi32, 4>; - def CLG : CompareRXY<"clg", 0xE321, z_ucmp, GR64, load, 8>; + def CLGF : CompareRXY<"clgf", 0xE331, z_ucmp, GR64, z_azextloadi32, 4>; + def CLG : CompareRXY<"clg", 0xE321, z_ucmp, GR64, z_load, 8>; def CLHRL : CompareRILPC<"clhrl", 0xC67, z_ucmp, GR32, - aligned_azextloadi16>; + aligned_z_azextloadi16>; def CLRL : CompareRILPC<"clrl", 0xC6F, z_ucmp, GR32, - aligned_load>; + aligned_z_load>; def CLGHRL : CompareRILPC<"clghrl", 0xC66, z_ucmp, GR64, - aligned_azextloadi16>; + aligned_z_azextloadi16>; def CLGFRL : CompareRILPC<"clgfrl", 0xC6E, z_ucmp, GR64, - aligned_azextloadi32>; + aligned_z_azextloadi32>; def CLGRL : CompareRILPC<"clgrl", 0xC6A, z_ucmp, GR64, - aligned_load>; + aligned_z_load>; // Comparison between memory and an unsigned 8-bit immediate. - defm CLI : CompareSIPair<"cli", 0x95, 0xEB55, z_ucmp, azextloadi8, imm32zx8>; + defm CLI : CompareSIPair<"cli", 0x95, 0xEB55, z_ucmp, z_azextloadi8, imm32zx8>; // Comparison between memory and an unsigned 16-bit immediate. - def CLHHSI : CompareSIL<"clhhsi", 0xE555, z_ucmp, azextloadi16, imm32zx16>; - def CLFHSI : CompareSIL<"clfhsi", 0xE55D, z_ucmp, load, imm32zx16>; - def CLGHSI : CompareSIL<"clghsi", 0xE559, z_ucmp, load, imm64zx16>; + def CLHHSI : CompareSIL<"clhhsi", 0xE555, z_ucmp, z_azextloadi16, imm32zx16>; + def CLFHSI : CompareSIL<"clfhsi", 0xE55D, z_ucmp, z_load, imm32zx16>; + def CLGHSI : CompareSIL<"clghsi", 0xE559, z_ucmp, z_load, imm64zx16>; } defm : ZXB; @@ -1693,7 +1693,7 @@ let Defs = [CC] in { def TMHL64 : CompareAliasRI; def TMHH64 : CompareAliasRI; - defm TM : CompareSIPair<"tm", 0x91, 0xEB51, z_tm_mem, anyextloadi8, imm32zx8>; + defm TM : CompareSIPair<"tm", 0x91, 0xEB51, z_tm_mem, z_anyextloadi8, imm32zx8>; } def TML : InstAlias<"tml\t$R, $I", (TMLL GR32:$R, imm32ll16:$I), 0>; @@ -1914,8 +1914,8 @@ let Predicates = [FeatureGuardedStorage], hasSideEffects = 1 in { // Decimal arithmetic //===----------------------------------------------------------------------===// -defm CVB : BinaryRXPair<"cvb",0x4F, 0xE306, null_frag, GR32, load, 4>; -def CVBG : BinaryRXY<"cvbg", 0xE30E, null_frag, GR64, load, 8>; +defm CVB : BinaryRXPair<"cvb",0x4F, 0xE306, null_frag, GR32, z_load, 4>; +def CVBG : BinaryRXY<"cvbg", 0xE30E, null_frag, GR64, z_load, 8>; defm CVD : StoreRXPair<"cvd", 0x4E, 0xE326, null_frag, GR32, 4>; def CVDG : StoreRXY<"cvdg", 0xE32E, null_frag, GR64, 8>; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td index 799b27d74414d..245e3c3399a98 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td @@ -140,8 +140,8 @@ let Predicates = [FeatureVector] in { // to use those instructions rather than force a 20-bit displacement // into a GPR temporary. let mayLoad = 1 in { - def VL32 : UnaryAliasVRX; - def VL64 : UnaryAliasVRX; + def VL32 : UnaryAliasVRX; + def VL64 : UnaryAliasVRX; } // Load logical element and zero. @@ -198,12 +198,12 @@ multiclass ReplicatePeephole; } -defm : ReplicatePeephole; -defm : ReplicatePeephole; -defm : ReplicatePeephole; -defm : ReplicatePeephole; -defm : ReplicatePeephole; -defm : ReplicatePeephole; +defm : ReplicatePeephole; +defm : ReplicatePeephole; +defm : ReplicatePeephole; +defm : ReplicatePeephole; +defm : ReplicatePeephole; +defm : ReplicatePeephole; //===----------------------------------------------------------------------===// // Stores @@ -1561,13 +1561,13 @@ let Predicates = [FeatureVector] in { // Any-extending loads into i128. let Predicates = [FeatureVector] in { - def : Pat<(i128 (extloadi8 bdxaddr12only:$addr)), + def : Pat<(i128 (z_extloadi8 bdxaddr12only:$addr)), (VLREPB bdxaddr12only:$addr)>; - def : Pat<(i128 (extloadi16 bdxaddr12only:$addr)), + def : Pat<(i128 (z_extloadi16 bdxaddr12only:$addr)), (VLREPH bdxaddr12only:$addr)>; - def : Pat<(i128 (extloadi32 bdxaddr12only:$addr)), + def : Pat<(i128 (z_extloadi32 bdxaddr12only:$addr)), (VLREPF bdxaddr12only:$addr)>; - def : Pat<(i128 (extloadi64 bdxaddr12only:$addr)), + def : Pat<(i128 (z_extloadi64 bdxaddr12only:$addr)), (VLREPG bdxaddr12only:$addr)>; } @@ -1621,13 +1621,13 @@ let Predicates = [FeatureVector] in { // Zero-extending loads into i128. let Predicates = [FeatureVector] in { - def : Pat<(i128 (zextloadi8 bdxaddr12only:$addr)), + def : Pat<(i128 (z_zextloadi8 bdxaddr12only:$addr)), (VLEB (VGBM 0), bdxaddr12only:$addr, 15)>; - def : Pat<(i128 (zextloadi16 bdxaddr12only:$addr)), + def : Pat<(i128 (z_zextloadi16 bdxaddr12only:$addr)), (VLEH (VGBM 0), bdxaddr12only:$addr, 7)>; - def : Pat<(i128 (zextloadi32 bdxaddr12only:$addr)), + def : Pat<(i128 (z_zextloadi32 bdxaddr12only:$addr)), (VLEF (VGBM 0), bdxaddr12only:$addr, 3)>; - def : Pat<(i128 (zextloadi64 bdxaddr12only:$addr)), + def : Pat<(i128 (z_zextloadi64 bdxaddr12only:$addr)), (VLEG (VGBM 0), bdxaddr12only:$addr, 1)>; } @@ -1663,13 +1663,13 @@ let Predicates = [FeatureVector] in { // Sign-extending loads into i128. let Predicates = [FeatureVector] in { - def : Pat<(i128 (sextloadi8 bdxaddr12only:$addr)), + def : Pat<(i128 (z_sextloadi8 bdxaddr12only:$addr)), (VSRAB (VLREPB bdxaddr12only:$addr), (VREPIB 120))>; - def : Pat<(i128 (sextloadi16 bdxaddr12only:$addr)), + def : Pat<(i128 (z_sextloadi16 bdxaddr12only:$addr)), (VSRAB (VLREPH bdxaddr12only:$addr), (VREPIB 112))>; - def : Pat<(i128 (sextloadi32 bdxaddr12only:$addr)), + def : Pat<(i128 (z_sextloadi32 bdxaddr12only:$addr)), (VSRAB (VLREPF bdxaddr12only:$addr), (VREPIB 96))>; - def : Pat<(i128 (sextloadi64 bdxaddr12only:$addr)), + def : Pat<(i128 (z_sextloadi64 bdxaddr12only:$addr)), (VSRAB (VLREPG bdxaddr12only:$addr), (VREPIB 64))>; } diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td index d98bb886c1850..6c4e33a6aa7fa 100644 --- a/llvm/lib/Target/SystemZ/SystemZOperators.td +++ b/llvm/lib/Target/SystemZ/SystemZOperators.td @@ -534,37 +534,108 @@ def zext8 : PatFrag<(ops node:$src), (and node:$src, 0xff)>; def zext16 : PatFrag<(ops node:$src), (and node:$src, 0xffff)>; def zext32 : PatFrag<(ops node:$src), (zext (i32 node:$src))>; -// Extending loads in which the extension type can be signed. -def asextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{ - unsigned Type = cast(N)->getExtensionType(); - return Type == ISD::EXTLOAD || Type == ISD::SEXTLOAD; +// Match a load or a non-extending atomic load. +def z_load : PatFrags<(ops node:$ptr), + [(load node:$ptr), + (atomic_load node:$ptr)], [{ + if (auto *AL = dyn_cast(N)) + if (AL->getExtensionType() != ISD::NON_EXTLOAD) + return false; + return true; }]>; -def asextloadi8 : PatFrag<(ops node:$ptr), (asextload node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i8; + +// Sign extending (atomic) loads. +def z_sextload : PatFrags<(ops node:$ptr), + [(unindexedload node:$ptr), + (atomic_load node:$ptr)], [{ + return getLoadExtType(N) == ISD::SEXTLOAD; }]>; -def asextloadi16 : PatFrag<(ops node:$ptr), (asextload node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i16; +def z_sextloadi8 : PatFrag<(ops node:$ptr), (z_sextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i8; }]>; -def asextloadi32 : PatFrag<(ops node:$ptr), (asextload node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i32; +def z_sextloadi16 : PatFrag<(ops node:$ptr), (z_sextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i16; +}]>; +def z_sextloadi32 : PatFrag<(ops node:$ptr), (z_sextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i32; +}]>; +def z_sextloadi64 : PatFrag<(ops node:$ptr), (z_sextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i64; }]>; -// Extending loads in which the extension type can be unsigned. -def azextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{ - unsigned Type = cast(N)->getExtensionType(); - return Type == ISD::EXTLOAD || Type == ISD::ZEXTLOAD; +// Zero extending (atomic) loads. +def z_zextload : PatFrags<(ops node:$ptr), + [(unindexedload node:$ptr), + (atomic_load node:$ptr)], [{ + return getLoadExtType(N) == ISD::ZEXTLOAD; }]>; -def azextloadi8 : PatFrag<(ops node:$ptr), (azextload node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i8; +def z_zextloadi8 : PatFrag<(ops node:$ptr), (z_zextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i8; }]>; -def azextloadi16 : PatFrag<(ops node:$ptr), (azextload node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i16; +def z_zextloadi16 : PatFrag<(ops node:$ptr), (z_zextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i16; }]>; -def azextloadi32 : PatFrag<(ops node:$ptr), (azextload node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i32; +def z_zextloadi32 : PatFrag<(ops node:$ptr), (z_zextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i32; +}]>; +def z_zextloadi64 : PatFrag<(ops node:$ptr), (z_zextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i64; +}]>; + +// Extending (atomic) loads in which the extension type can be signed. +def z_asextload : PatFrags<(ops node:$ptr), + [(unindexedload node:$ptr), + (atomic_load node:$ptr)], [{ + ISD::LoadExtType ETy = getLoadExtType(N); + return ETy == ISD::EXTLOAD || ETy == ISD::SEXTLOAD; +}]>; +def z_asextloadi8 : PatFrag<(ops node:$ptr), (z_asextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i8; +}]>; +def z_asextloadi16 : PatFrag<(ops node:$ptr), (z_asextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i16; +}]>; +def z_asextloadi32 : PatFrag<(ops node:$ptr), (z_asextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i32; }]>; -// Extending loads in which the extension type doesn't matter. +// Extending (atomic) loads in which the extension type can be unsigned. +def z_azextload : PatFrags<(ops node:$ptr), + [(unindexedload node:$ptr), + (atomic_load node:$ptr)], [{ + ISD::LoadExtType ETy = getLoadExtType(N); + return ETy == ISD::EXTLOAD || ETy == ISD::ZEXTLOAD; +}]>; +def z_azextloadi8 : PatFrag<(ops node:$ptr), (z_azextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i8; +}]>; +def z_azextloadi16 : PatFrag<(ops node:$ptr), (z_azextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i16; +}]>; +def z_azextloadi32 : PatFrag<(ops node:$ptr), (z_azextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i32; +}]>; + +// Extending (atomic) loads in which the extension type doesn't matter. +def z_anyextload : PatFrags<(ops node:$ptr), + [(unindexedload node:$ptr), + (atomic_load node:$ptr)], [{ + return getLoadExtType(N) != ISD::NON_EXTLOAD; +}]>; +def z_anyextloadi8 : PatFrag<(ops node:$ptr), (z_anyextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i8; +}]>; +def z_anyextloadi16 : PatFrag<(ops node:$ptr), (z_anyextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i16; +}]>; +def z_anyextloadi32 : PatFrag<(ops node:$ptr), (z_anyextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i32; +}]>; +def z_anyextloadi64 : PatFrag<(ops node:$ptr), (z_anyextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i64; +}]>; + +// Extending non-atomic loads in which the extension type doesn't matter. def anyextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{ return cast(N)->getExtensionType() != ISD::NON_EXTLOAD; }]>; @@ -578,15 +649,42 @@ def anyextloadi32 : PatFrag<(ops node:$ptr), (anyextload node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i32; }]>; +// Extending (atomic) loads that are not sign/zero extending. +def z_extload : PatFrags<(ops node:$ptr), + [(extload node:$ptr), + (atomic_load node:$ptr)], [{ + return getLoadExtType(N) == ISD::EXTLOAD; +}]>; +def z_extloadi8 : PatFrag<(ops node:$ptr), (z_extload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i8; +}]>; +def z_extloadi16 : PatFrag<(ops node:$ptr), (z_extload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i16; +}]>; +def z_extloadi32 : PatFrag<(ops node:$ptr), (z_extload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i32; +}]>; +def z_extloadi64 : PatFrag<(ops node:$ptr), (z_extload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i64; +}]>; + +// Extending atomic FP loads. +def z_any_extloadf32 : PatFrags<(ops node:$ptr), + [(any_extloadf32 node:$ptr), + (any_fpextend (f32 (atomic_load node:$ptr)))]>; +def z_any_extloadf64 : PatFrags<(ops node:$ptr), + [(any_extloadf64 node:$ptr), + (any_fpextend (f64 (atomic_load node:$ptr)))]>; + // Aligned loads. class AlignedLoad : PatFrag<(ops node:$addr), (load node:$addr), [{ return storeLoadIsAligned(N); }]>; -def aligned_load : AlignedLoad; -def aligned_asextloadi16 : AlignedLoad; -def aligned_asextloadi32 : AlignedLoad; -def aligned_azextloadi16 : AlignedLoad; -def aligned_azextloadi32 : AlignedLoad; +def aligned_z_load : AlignedLoad; +def aligned_z_asextloadi16 : AlignedLoad; +def aligned_z_asextloadi32 : AlignedLoad; +def aligned_z_azextloadi16 : AlignedLoad; +def aligned_z_azextloadi32 : AlignedLoad; // Aligned stores. class AlignedStore @@ -749,7 +847,7 @@ def z_any_vround : PatFrags<(ops node:$src), // Create a unary operator that loads from memory and then performs // the given operation on it. -class loadu +class loadu : PatFrag<(ops node:$addr), (operator (load node:$addr))>; // Create a store operator that performs the given unary operation @@ -799,12 +897,12 @@ def imm32nobytes : PatLeaf<(i32 imm), [{ class z_replicate_load : PatFrag<(ops node:$addr), (z_replicate (scalartype (load node:$addr)))>; -def z_replicate_loadi8 : z_replicate_load; -def z_replicate_loadi16 : z_replicate_load; -def z_replicate_loadi32 : z_replicate_load; -def z_replicate_loadi64 : z_replicate_load; -def z_replicate_loadf32 : z_replicate_load; -def z_replicate_loadf64 : z_replicate_load; +def z_replicate_loadi8 : z_replicate_load; +def z_replicate_loadi16 : z_replicate_load; +def z_replicate_loadi32 : z_replicate_load; +def z_replicate_loadi64 : z_replicate_load; +def z_replicate_loadf32 : z_replicate_load; +def z_replicate_loadf64 : z_replicate_load; // Byte-swapped replicated vector element loads. def z_replicate_loadbswapi16 : z_replicate_load; def z_replicate_loadbswapi32 : z_replicate_load; @@ -815,12 +913,12 @@ class z_vle : PatFrag<(ops node:$vec, node:$addr, node:$index), (z_vector_insert node:$vec, (scalartype (load node:$addr)), node:$index)>; -def z_vlei8 : z_vle; -def z_vlei16 : z_vle; -def z_vlei32 : z_vle; -def z_vlei64 : z_vle; -def z_vlef32 : z_vle; -def z_vlef64 : z_vle; +def z_vlei8 : z_vle; +def z_vlei16 : z_vle; +def z_vlei32 : z_vle; +def z_vlei64 : z_vle; +def z_vlef32 : z_vle; +def z_vlef64 : z_vle; // Byte-swapped vector element loads. def z_vlebri16 : z_vle; def z_vlebri32 : z_vle; @@ -832,13 +930,13 @@ class z_vllez : PatFrag<(ops node:$addr), (z_vector_insert immAllZerosV, (scalartype (load node:$addr)), (i32 index))>; -def z_vllezi8 : z_vllez; -def z_vllezi16 : z_vllez; -def z_vllezi32 : z_vllez; +def z_vllezi8 : z_vllez; +def z_vllezi16 : z_vllez; +def z_vllezi32 : z_vllez; def z_vllezi64 : PatFrags<(ops node:$addr), [(z_vector_insert immAllZerosV, - (i64 (load node:$addr)), (i32 0)), - (z_join_dwords (i64 (load node:$addr)), (i64 0))]>; + (i64 (z_load node:$addr)), (i32 0)), + (z_join_dwords (i64 (z_load node:$addr)), (i64 0))]>; // We use high merges to form a v4f32 from four f32s. Propagating zero // into all elements but index 1 gives this expression. def z_vllezf32 : PatFrag<(ops node:$addr), @@ -848,23 +946,23 @@ def z_vllezf32 : PatFrag<(ops node:$addr), (v4i32 (bitconvert (v4f32 (scalar_to_vector - (f32 (load node:$addr)))))))), + (f32 (z_load node:$addr)))))))), (v2i64 (bitconvert (v4f32 immAllZerosV))))>; def z_vllezf64 : PatFrag<(ops node:$addr), (z_merge_high - (v2f64 (scalar_to_vector (f64 (load node:$addr)))), + (v2f64 (scalar_to_vector (f64 (z_load node:$addr)))), immAllZerosV)>; // Similarly for the high element of a zeroed vector. -def z_vllezli32 : z_vllez; +def z_vllezli32 : z_vllez; def z_vllezlf32 : PatFrag<(ops node:$addr), (z_merge_high (v2i64 (bitconvert (z_merge_high (v4f32 (scalar_to_vector - (f32 (load node:$addr)))), + (f32 (z_load node:$addr)))), (v4f32 immAllZerosV)))), (v2i64 (bitconvert (v4f32 immAllZerosV))))>; diff --git a/llvm/lib/Target/SystemZ/SystemZPatterns.td b/llvm/lib/Target/SystemZ/SystemZPatterns.td index 5e5dca77e9553..4d6bc68e9a7ed 100644 --- a/llvm/lib/Target/SystemZ/SystemZPatterns.td +++ b/llvm/lib/Target/SystemZ/SystemZPatterns.td @@ -49,8 +49,8 @@ class RMWI { - def : RMWI; - def : RMWI; + def : RMWI; + def : RMWI; } // Record that INSN performs insertion TYPE into a register of class CLS. diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td index 1e548d7c101a7..cbad5a0eafb27 100644 --- a/llvm/lib/Target/VE/VEInstrInfo.td +++ b/llvm/lib/Target/VE/VEInstrInfo.td @@ -1785,14 +1785,14 @@ defm : TRUNC64m; defm : TRUNC64m; defm : TRUNC64m; -// Atomic loads +// Atomic loads (FIXME: replace iAny with the correct integer VT:) multiclass ATMLDm { - def : Pat<(from ADDRrri:$addr), (torri MEMrri:$addr)>; - def : Pat<(from ADDRrii:$addr), (torii MEMrii:$addr)>; - def : Pat<(from ADDRzri:$addr), (tozri MEMzri:$addr)>; - def : Pat<(from ADDRzii:$addr), (tozii MEMzii:$addr)>; + def : Pat<(iAny (from ADDRrri:$addr)), (torri MEMrri:$addr)>; + def : Pat<(iAny (from ADDRrii:$addr)), (torii MEMrii:$addr)>; + def : Pat<(iAny (from ADDRzri:$addr)), (tozri MEMzri:$addr)>; + def : Pat<(iAny (from ADDRzii:$addr)), (tozii MEMzii:$addr)>; } defm : ATMLDm; defm : ATMLDm; diff --git a/llvm/test/CodeGen/SystemZ/atomic-load-06.ll b/llvm/test/CodeGen/SystemZ/atomic-load-06.ll index c9c5504520345..60ff780df87b0 100644 --- a/llvm/test/CodeGen/SystemZ/atomic-load-06.ll +++ b/llvm/test/CodeGen/SystemZ/atomic-load-06.ll @@ -4,9 +4,7 @@ define float @f1(ptr %src) { ; CHECK-LABEL: f1: -; CHECK: lgf [[R:%r[0-9]+]], 0(%r2) -; CHECK: sllg [[R]], [[R]], 32 -; CHECK: ldgr %f0, [[R]] +; CHECK: le %f0, 0(%r2) ; CHECK: br %r14 %val = load atomic float, ptr %src seq_cst, align 4 ret float %val diff --git a/llvm/test/CodeGen/SystemZ/atomic-memops-fp128.ll b/llvm/test/CodeGen/SystemZ/atomic-memops-fp128.ll new file mode 100644 index 0000000000000..8038329c0e09a --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/atomic-memops-fp128.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s +; +; Test fpext of atomic loads to fp128 without VectorEnhancements1 (using FP register pairs). + +define fp128 @f1(ptr %src) { +; CHECK-LABEL: f1: +; CHECK: # %bb.0: +; CHECK-NEXT: lxeb %f0, 0(%r3) +; CHECK-NEXT: std %f0, 0(%r2) +; CHECK-NEXT: std %f2, 8(%r2) +; CHECK-NEXT: br %r14 + %V = load atomic float, ptr %src seq_cst, align 4 + %Res = fpext float %V to fp128 + ret fp128 %Res +} + +define fp128 @f2(ptr %src) { +; CHECK-LABEL: f2: +; CHECK: # %bb.0: +; CHECK-NEXT: lxdb %f0, 0(%r3) +; CHECK-NEXT: std %f0, 0(%r2) +; CHECK-NEXT: std %f2, 8(%r2) +; CHECK-NEXT: br %r14 + %V = load atomic double, ptr %src seq_cst, align 8 + %Res = fpext double %V to fp128 + ret fp128 %Res +} + + + diff --git a/llvm/test/CodeGen/SystemZ/atomic-memops.ll b/llvm/test/CodeGen/SystemZ/atomic-memops.ll new file mode 100644 index 0000000000000..0bc647aa0e0f7 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/atomic-memops.ll @@ -0,0 +1,739 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z16 | FileCheck %s + +; Sign-extending atomic loads. +define void @f1(ptr %src, ptr %dst) { +; CHECK-LABEL: f1: +; CHECK: # %bb.0: +; CHECK-NEXT: lb %r0, 0(%r2) +; CHECK-NEXT: sth %r0, 0(%r3) +; CHECK-NEXT: br %r14 + %b = load atomic i8, ptr %src seq_cst, align 1 + %s = sext i8 %b to i16 + store volatile i16 %s, ptr %dst + ret void +} + +define void @f2(ptr %src, ptr %dst) { +; CHECK-LABEL: f2: +; CHECK: # %bb.0: +; CHECK-NEXT: lb %r0, 0(%r2) +; CHECK-NEXT: st %r0, 0(%r3) +; CHECK-NEXT: br %r14 + %b = load atomic i8, ptr %src seq_cst, align 1 + %s = sext i8 %b to i32 + store volatile i32 %s, ptr %dst + ret void +} + +define void @f3(ptr %src, ptr %dst) { +; CHECK-LABEL: f3: +; CHECK: # %bb.0: +; CHECK-NEXT: lgb %r0, 0(%r2) +; CHECK-NEXT: stg %r0, 0(%r3) +; CHECK-NEXT: br %r14 + %b = load atomic i8, ptr %src seq_cst, align 1 + %s = sext i8 %b to i64 + store volatile i64 %s, ptr %dst + ret void +} + +define void @f4(ptr %src, ptr %dst) { +; CHECK-LABEL: f4: +; CHECK: # %bb.0: +; CHECK-NEXT: lh %r0, 0(%r2) +; CHECK-NEXT: st %r0, 0(%r3) +; CHECK-NEXT: br %r14 + %b = load atomic i16, ptr %src seq_cst, align 2 + %s = sext i16 %b to i32 + store volatile i32 %s, ptr %dst + ret void +} + +define void @f5(ptr %src, ptr %dst) { +; CHECK-LABEL: f5: +; CHECK: # %bb.0: +; CHECK-NEXT: lgh %r0, 0(%r2) +; CHECK-NEXT: stg %r0, 0(%r3) +; CHECK-NEXT: br %r14 + %b = load atomic i16, ptr %src seq_cst, align 2 + %s = sext i16 %b to i64 + store volatile i64 %s, ptr %dst + ret void +} + +define void @f6(ptr %src, ptr %dst) { +; CHECK-LABEL: f6: +; CHECK: # %bb.0: +; CHECK-NEXT: lgf %r0, 0(%r2) +; CHECK-NEXT: stg %r0, 0(%r3) +; CHECK-NEXT: br %r14 + %b = load atomic i32, ptr %src seq_cst, align 4 + %s = sext i32 %b to i64 + store volatile i64 %s, ptr %dst + ret void +} + +; Zero-extending atomic loads. +define void @f7(ptr %src, ptr %dst) { +; CHECK-LABEL: f7: +; CHECK: # %bb.0: +; CHECK-NEXT: llc %r0, 0(%r2) +; CHECK-NEXT: sth %r0, 0(%r3) +; CHECK-NEXT: br %r14 + %b = load atomic i8, ptr %src seq_cst, align 1 + %z = zext i8 %b to i16 + store volatile i16 %z, ptr %dst + ret void +} + +define void @f8(ptr %src, ptr %dst) { +; CHECK-LABEL: f8: +; CHECK: # %bb.0: +; CHECK-NEXT: llc %r0, 0(%r2) +; CHECK-NEXT: st %r0, 0(%r3) +; CHECK-NEXT: br %r14 + %b = load atomic i8, ptr %src seq_cst, align 1 + %z = zext i8 %b to i32 + store volatile i32 %z, ptr %dst + ret void +} + +define void @f9(ptr %src, ptr %dst) { +; CHECK-LABEL: f9: +; CHECK: # %bb.0: +; CHECK-NEXT: llgc %r0, 0(%r2) +; CHECK-NEXT: stg %r0, 0(%r3) +; CHECK-NEXT: br %r14 + %b = load atomic i8, ptr %src seq_cst, align 1 + %z = zext i8 %b to i64 + store volatile i64 %z, ptr %dst + ret void +} + +define void @f10(ptr %src, ptr %dst) { +; CHECK-LABEL: f10: +; CHECK: # %bb.0: +; CHECK-NEXT: llh %r0, 0(%r2) +; CHECK-NEXT: st %r0, 0(%r3) +; CHECK-NEXT: br %r14 + %b = load atomic i16, ptr %src seq_cst, align 2 + %z = zext i16 %b to i32 + store volatile i32 %z, ptr %dst + ret void +} + +define void @f11(ptr %src, ptr %dst) { +; CHECK-LABEL: f11: +; CHECK: # %bb.0: +; CHECK-NEXT: llgh %r0, 0(%r2) +; CHECK-NEXT: stg %r0, 0(%r3) +; CHECK-NEXT: br %r14 + %b = load atomic i16, ptr %src seq_cst, align 2 + %z = zext i16 %b to i64 + store volatile i64 %z, ptr %dst + ret void +} + +define void @f12(ptr %src, ptr %dst) { +; CHECK-LABEL: f12: +; CHECK: # %bb.0: +; CHECK-NEXT: llgf %r0, 0(%r2) +; CHECK-NEXT: stg %r0, 0(%r3) +; CHECK-NEXT: br %r14 + %b = load atomic i32, ptr %src seq_cst, align 4 + %z = zext i32 %b to i64 + store volatile i64 %z, ptr %dst + ret void +} + +; reg/mem +define i64 @f13(i64 %a, ptr %src) { +; CHECK-LABEL: f13: +; CHECK: # %bb.0: +; CHECK-NEXT: ag %r2, 0(%r3) +; CHECK-NEXT: br %r14 + %b = load atomic i64, ptr %src seq_cst, align 8 + %add = add i64 %a, %b + ret i64 %add +} + +; reg/mem op with extension from memory. +define i64 @f14(i64 %a, ptr %src) { +; CHECK-LABEL: f14: +; CHECK: # %bb.0: +; CHECK-NEXT: slgf %r2, 0(%r3) +; CHECK-NEXT: br %r14 + %b = load atomic i32, ptr %src seq_cst, align 4 + %bext = zext i32 %b to i64 + %sub = sub i64 %a, %bext + ret i64 %sub +} + +define float @f15(float %f1, ptr %ptr, float %acc) { +; CHECK-LABEL: f15: +; CHECK: # %bb.0: +; CHECK-NEXT: maeb %f2, %f0, 0(%r2) +; CHECK-NEXT: ldr %f0, %f2 +; CHECK-NEXT: br %r14 + %f2 = load atomic float, ptr %ptr seq_cst, align 4 + %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc) + ret float %res +} +declare float @llvm.fma.f32(float %f1, float %f2, float %f3) + +define double @f15_b(ptr %src) { +; CHECK-LABEL: f15_b: +; CHECK: # %bb.0: +; CHECK-NEXT: ldeb %f0, 0(%r2) +; CHECK-NEXT: br %r14 + %V = load atomic float, ptr %src seq_cst, align 4 + %Res = fpext float %V to double + ret double %Res +} + +define fp128 @f15_c(ptr %src) { +; CHECK-LABEL: f15_c: +; CHECK: # %bb.0: +; CHECK-NEXT: lde %f0, 0(%r3) +; CHECK-NEXT: ldebr %f0, %f0 +; CHECK-NEXT: wflld %v0, %f0 +; CHECK-NEXT: vst %v0, 0(%r2), 3 +; CHECK-NEXT: br %r14 + %V = load atomic float, ptr %src seq_cst, align 4 + %Res = fpext float %V to fp128 + ret fp128 %Res +} + +define fp128 @f15_d(ptr %src) { +; CHECK-LABEL: f15_d: +; CHECK: # %bb.0: +; CHECK-NEXT: ld %f0, 0(%r3) +; CHECK-NEXT: wflld %v0, %f0 +; CHECK-NEXT: vst %v0, 0(%r2), 3 +; CHECK-NEXT: br %r14 + %V = load atomic double, ptr %src seq_cst, align 8 + %Res = fpext double %V to fp128 + ret fp128 %Res +} + +; Do it twice for good measure given the involved DAG combines. +define void @f16(ptr %src, ptr %dst) { +; CHECK-LABEL: f16: +; CHECK: # %bb.0: +; CHECK-NEXT: llgc %r0, 0(%r2) +; CHECK-NEXT: lgbr %r1, %r0 +; CHECK-NEXT: stg %r1, 0(%r3) +; CHECK-NEXT: stg %r0, 0(%r3) +; CHECK-NEXT: llgc %r0, 0(%r2) +; CHECK-NEXT: lgbr %r1, %r0 +; CHECK-NEXT: stg %r1, 0(%r3) +; CHECK-NEXT: stg %r0, 0(%r3) +; CHECK-NEXT: br %r14 + %b = load atomic i8, ptr %src seq_cst, align 1 + %s = sext i8 %b to i64 + %z = zext i8 %b to i64 + store volatile i64 %s, ptr %dst + store volatile i64 %z, ptr %dst + + %b2 = load atomic i8, ptr %src seq_cst, align 1 + %s2 = sext i8 %b2 to i64 + %z2 = zext i8 %b2 to i64 + store volatile i64 %s2, ptr %dst + store volatile i64 %z2, ptr %dst + + ret void +} + +define void @f16_b(ptr %src, ptr %dst) { +; CHECK-LABEL: f16_b: +; CHECK: # %bb.0: +; CHECK-NEXT: lgb %r0, 0(%r2) +; CHECK-NEXT: sth %r0, 0(%r3) +; CHECK-NEXT: stg %r0, 0(%r3) +; CHECK-NEXT: br %r14 + %b = load atomic i8, ptr %src seq_cst, align 1 + %s = sext i8 %b to i16 + store volatile i16 %s, ptr %dst + + %s2 = sext i8 %b to i64 + store volatile i64 %s2, ptr %dst + + ret void +} + +define void @f16_c(ptr %src, ptr %dst) { +; CHECK-LABEL: f16_c: +; CHECK: # %bb.0: +; CHECK-NEXT: llgc %r0, 0(%r2) +; CHECK-NEXT: sth %r0, 0(%r3) +; CHECK-NEXT: stg %r0, 0(%r3) +; CHECK-NEXT: br %r14 + %b = load atomic i8, ptr %src seq_cst, align 1 + %z = zext i8 %b to i16 + store volatile i16 %z, ptr %dst + + %z2 = zext i8 %b to i64 + store volatile i64 %z2, ptr %dst + + ret void +} + +; Check that two i8 loads use a reg/reg op. +define i8 @f16_d(ptr %src, ptr %src2) { +; CHECK-LABEL: f16_d: +; CHECK: # %bb.0: +; CHECK-NEXT: lb %r2, 0(%r2) +; CHECK-NEXT: lb %r0, 0(%r3) +; CHECK-NEXT: ar %r2, %r0 +; CHECK-NEXT: br %r14 + %b = load atomic i8, ptr %src seq_cst, align 1 + %b2 = load atomic i8, ptr %src2 seq_cst, align 1 + %add = add i8 %b, %b2 + ret i8 %add +} + +; Binary operations on a byte in memory, with an atomic load. +define void @f17(ptr %ptr) { +; CHECK-LABEL: f17: +; CHECK: # %bb.0: +; CHECK-NEXT: ni 0(%r2), 1 +; CHECK-NEXT: br %r14 + %val = load atomic i8, ptr %ptr seq_cst, align 1 + %xor = and i8 %val, -255 + store i8 %xor, ptr %ptr + ret void +} + +define void @f18(ptr %src) { +; CHECK-LABEL: f18: +; CHECK: # %bb.0: +; CHECK-NEXT: oiy 4096(%r2), 1 +; CHECK-NEXT: br %r14 + %ptr = getelementptr i8, ptr %src, i64 4096 + %val = load atomic i8, ptr %ptr seq_cst, align 1 + %xor = or i8 %val, -255 + store i8 %xor, ptr %ptr + ret void +} + +define void @f19(ptr %src) { +; CHECK-LABEL: f19: +; CHECK: # %bb.0: +; CHECK-NEXT: xi 4095(%r2), 1 +; CHECK-NEXT: br %r14 + %ptr = getelementptr i8, ptr %src, i64 4095 + %val = load atomic i8, ptr %ptr seq_cst, align 1 + %xor = xor i8 %val, -255 + store i8 %xor, ptr %ptr + ret void +} + +; TM +define double @f20(ptr %src, double %a, double %b) { +; CHECK-LABEL: f20: +; CHECK: # %bb.0: +; CHECK-NEXT: tm 0(%r2), 1 +; CHECK-NEXT: je .LBB25_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ldr %f2, %f0 +; CHECK-NEXT: .LBB25_2: +; CHECK-NEXT: ldr %f0, %f2 +; CHECK-NEXT: br %r14 + %byte = load atomic i8, ptr %src seq_cst, align 1 + %and = and i8 %byte, 1 + %cmp = icmp eq i8 %and, 0 + %res = select i1 %cmp, double %b, double %a + ret double %res +} + +; vector load and replicate +define void @f21(ptr %src, ptr %dst) { +; CHECK-LABEL: f21: +; CHECK: # %bb.0: +; CHECK-NEXT: vlrepb %v0, 0(%r2) +; CHECK-NEXT: vst %v0, 0(%r3), 3 +; CHECK-NEXT: br %r14 + %b = load atomic i8, ptr %src seq_cst, align 1 + %v = insertelement <16 x i8> undef, i8 %b, i32 1 + store volatile <16 x i8> %v, ptr %dst + ret void +} + +define void @f22(ptr %src, ptr %dst) { +; CHECK-LABEL: f22: +; CHECK: # %bb.0: +; CHECK-NEXT: vlreph %v0, 0(%r2) +; CHECK-NEXT: vst %v0, 0(%r3), 3 +; CHECK-NEXT: br %r14 + %b = load atomic i16, ptr %src seq_cst, align 2 + %v = insertelement <8 x i16> undef, i16 %b, i32 1 + store volatile <8 x i16> %v, ptr %dst + ret void +} + +define void @f23(ptr %src, ptr %dst) { +; CHECK-LABEL: f23: +; CHECK: # %bb.0: +; CHECK-NEXT: vlrepf %v0, 0(%r2) +; CHECK-NEXT: vst %v0, 0(%r3), 3 +; CHECK-NEXT: br %r14 + %b = load atomic i32, ptr %src seq_cst, align 4 + %v = insertelement <4 x i32> undef, i32 %b, i32 2 + store volatile <4 x i32> %v, ptr %dst + ret void +} + +define void @f24(ptr %src, ptr %dst) { +; CHECK-LABEL: f24: +; CHECK: # %bb.0: +; CHECK-NEXT: vlrepg %v0, 0(%r2) +; CHECK-NEXT: vst %v0, 0(%r3), 3 +; CHECK-NEXT: br %r14 + %b = load atomic i64, ptr %src seq_cst, align 8 + %v = insertelement <2 x i64> undef, i64 %b, i32 0 + store volatile <2 x i64> %v, ptr %dst + ret void +} + +define void @f25(ptr %src, ptr %dst) { +; CHECK-LABEL: f25: +; CHECK: # %bb.0: +; CHECK-NEXT: vlrepf %v0, 0(%r2) +; CHECK-NEXT: vst %v0, 0(%r3), 3 +; CHECK-NEXT: br %r14 + %b = load atomic float, ptr %src seq_cst, align 4 + %v = insertelement <4 x float> undef, float %b, i32 1 + store volatile <4 x float> %v, ptr %dst + ret void +} + +; Do *not* use vlrep for an extending load. +define <4 x i32> @f25_c(ptr %ptr) { +; CHECK-LABEL: f25_c: +; CHECK: # %bb.0: +; CHECK-NEXT: lb %r0, 0(%r2) +; CHECK-NEXT: vlvgp %v0, %r0, %r0 +; CHECK-NEXT: vrepf %v24, %v0, 1 +; CHECK-NEXT: br %r14 + %L = load atomic i8, ptr %ptr seq_cst, align 4 + %S = sext i8 %L to i32 + %val = insertelement <4 x i32> undef, i32 %S, i32 0 + %ret = shufflevector <4 x i32> %val, <4 x i32> undef, + <4 x i32> zeroinitializer + ret <4 x i32> %ret +} + +; Do *not* use vlrep if there is another scalar use. +define <4 x i32> @f25_d(ptr %ptr, ptr %dst) { +; CHECK-LABEL: f25_d: +; CHECK: # %bb.0: +; CHECK-NEXT: l %r0, 0(%r2) +; CHECK-NEXT: vlvgp %v0, %r0, %r0 +; CHECK-NEXT: vrepf %v24, %v0, 1 +; CHECK-NEXT: st %r0, 0(%r3) +; CHECK-NEXT: br %r14 + %L = load atomic i32, ptr %ptr seq_cst, align 4 + store i32 %L, ptr %dst, align 4 + %val = insertelement <4 x i32> undef, i32 %L, i32 0 + %ret = shufflevector <4 x i32> %val, <4 x i32> undef, + <4 x i32> zeroinitializer + ret <4 x i32> %ret +} + +define void @f26(ptr %src, ptr %dst) { +; CHECK-LABEL: f26: +; CHECK: # %bb.0: +; CHECK-NEXT: vlrepg %v0, 0(%r2) +; CHECK-NEXT: vst %v0, 0(%r3), 3 +; CHECK-NEXT: br %r14 + %b = load atomic double, ptr %src seq_cst, align 8 + %v = insertelement <2 x double> undef, double %b, i32 0 + store volatile <2 x double> %v, ptr %dst + ret void +} + +; Vector Load logical element and zero. +define <16 x i8> @f27(ptr %ptr) { +; CHECK-LABEL: f27: +; CHECK: # %bb.0: +; CHECK-NEXT: vllezb %v24, 0(%r2) +; CHECK-NEXT: br %r14 + %val = load atomic i8, ptr %ptr seq_cst, align 1 + %ret = insertelement <16 x i8> zeroinitializer, i8 %val, i32 7 + ret <16 x i8> %ret +} + +define <8 x i16> @f28(ptr %ptr) { +; CHECK-LABEL: f28: +; CHECK: # %bb.0: +; CHECK-NEXT: vllezh %v24, 0(%r2) +; CHECK-NEXT: br %r14 + %val = load atomic i16, ptr %ptr seq_cst, align 2 + %ret = insertelement <8 x i16> zeroinitializer, i16 %val, i32 3 + ret <8 x i16> %ret +} + +define <4 x i32> @f29(ptr %ptr) { +; CHECK-LABEL: f29: +; CHECK: # %bb.0: +; CHECK-NEXT: vllezf %v24, 0(%r2) +; CHECK-NEXT: br %r14 + %val = load atomic i32, ptr %ptr seq_cst, align 4 + %ret = insertelement <4 x i32> zeroinitializer, i32 %val, i32 1 + ret <4 x i32> %ret +} + +define <2 x i64> @f30(ptr %ptr) { +; CHECK-LABEL: f30: +; CHECK: # %bb.0: +; CHECK-NEXT: vllezg %v24, 0(%r2) +; CHECK-NEXT: br %r14 + %val = load atomic i64, ptr %ptr seq_cst, align 8 + %ret = insertelement <2 x i64> zeroinitializer, i64 %val, i32 0 + ret <2 x i64> %ret +} + +define <4 x i32> @f31(ptr %ptr) { +; CHECK-LABEL: f31: +; CHECK: # %bb.0: +; CHECK-NEXT: vllezlf %v24, 0(%r2) +; CHECK-NEXT: br %r14 + %val = load atomic i32, ptr %ptr seq_cst, align 4 + %ret = insertelement <4 x i32> zeroinitializer, i32 %val, i32 0 + ret <4 x i32> %ret +} + +define <4 x float> @f32(ptr %ptr) { +; CHECK-LABEL: f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vllezlf %v24, 0(%r2) +; CHECK-NEXT: br %r14 + %val = load atomic float, ptr %ptr seq_cst, align 4 + %ret = insertelement <4 x float> zeroinitializer, float %val, i32 0 + ret <4 x float> %ret +} + +; Vector Load element. +define <16 x i8> @f33(<16 x i8> %val, ptr %ptr) { +; CHECK-LABEL: f33: +; CHECK: # %bb.0: +; CHECK-NEXT: vleb %v24, 0(%r2), 0 +; CHECK-NEXT: br %r14 + %element = load atomic i8, ptr %ptr seq_cst, align 1 + %ret = insertelement <16 x i8> %val, i8 %element, i32 0 + ret <16 x i8> %ret +} + +define <8 x i16> @f34(<8 x i16> %val, ptr %ptr) { +; CHECK-LABEL: f34: +; CHECK: # %bb.0: +; CHECK-NEXT: vleh %v24, 0(%r2), 0 +; CHECK-NEXT: br %r14 + %element = load atomic i16, ptr %ptr seq_cst, align 2 + %ret = insertelement <8 x i16> %val, i16 %element, i32 0 + ret <8 x i16> %ret +} + +define <4 x i32> @f35(<4 x i32> %val, ptr %ptr) { +; CHECK-LABEL: f35: +; CHECK: # %bb.0: +; CHECK-NEXT: vlef %v24, 0(%r2), 0 +; CHECK-NEXT: br %r14 + %element = load atomic i32, ptr %ptr seq_cst, align 4 + %ret = insertelement <4 x i32> %val, i32 %element, i32 0 + ret <4 x i32> %ret +} + +define <2 x i64> @f36(<2 x i64> %val, ptr %ptr) { +; CHECK-LABEL: f36: +; CHECK: # %bb.0: +; CHECK-NEXT: vleg %v24, 0(%r2), 0 +; CHECK-NEXT: br %r14 + %element = load atomic i64, ptr %ptr seq_cst, align 8 + %ret = insertelement <2 x i64> %val, i64 %element, i32 0 + ret <2 x i64> %ret +} + +; Test operation on memory involving atomic load and store. +define void @f39(ptr %ptr) { +; CHECK-LABEL: f39: +; CHECK: # %bb.0: +; CHECK-NEXT: oi 0(%r2), 1 +; CHECK-NEXT: bcr 14, %r0 +; CHECK-NEXT: br %r14 + %val = load atomic i8, ptr %ptr seq_cst, align 1 + %or = or i8 %val, -255 + store atomic i8 %or, ptr %ptr seq_cst, align 1 + ret void +} + +; Some atomic stores of immediates. +define void @f40(ptr %ptr) { +; CHECK-LABEL: f40: +; CHECK: # %bb.0: +; CHECK-NEXT: mvi 0(%r2), 128 +; CHECK-NEXT: bcr 14, %r0 +; CHECK-NEXT: br %r14 + store atomic i8 128, ptr %ptr seq_cst, align 1 + ret void +} + +define void @f41(ptr %ptr) { +; CHECK-LABEL: f41: +; CHECK: # %bb.0: +; CHECK-NEXT: mvhi 0(%r2), -1 +; CHECK-NEXT: bcr 14, %r0 +; CHECK-NEXT: br %r14 + store atomic i32 4294967295, ptr %ptr seq_cst, align 4 + ret void +} + +define void @f42(ptr %ptr) { +; CHECK-LABEL: f42: +; CHECK: # %bb.0: +; CHECK-NEXT: mvhi 0(%r2), -1 +; CHECK-NEXT: bcr 14, %r0 +; CHECK-NEXT: br %r14 + store atomic i32 4294967295, ptr %ptr seq_cst, align 4 + ret void +} + +define void @f43(ptr %ptr) { +; CHECK-LABEL: f43: +; CHECK: # %bb.0: +; CHECK-NEXT: llihl %r0, 255 +; CHECK-NEXT: oilf %r0, 4294967295 +; CHECK-NEXT: stg %r0, 0(%r2) +; CHECK-NEXT: bcr 14, %r0 +; CHECK-NEXT: br %r14 + store atomic i64 1099511627775, ptr %ptr seq_cst, align 8 + ret void +} + +define void @f44(ptr %ptr) { +; CHECK-LABEL: f44: +; CHECK: # %bb.0: +; CHECK-NEXT: larl %r1, .LCPI49_0 +; CHECK-NEXT: ld %f0, 0(%r1) +; CHECK-NEXT: std %f0, 0(%r2) +; CHECK-NEXT: bcr 14, %r0 +; CHECK-NEXT: br %r14 + store atomic double 0x3ff0000020000000, ptr %ptr seq_cst, align 8 + ret void +} + +; Vector Store Element. +define void @f45(<16 x i8> %val, ptr %ptr) { +; CHECK-LABEL: f45: +; CHECK: # %bb.0: +; CHECK-NEXT: vsteb %v24, 0(%r2), 0 +; CHECK-NEXT: bcr 14, %r0 +; CHECK-NEXT: br %r14 + %element = extractelement <16 x i8> %val, i32 0 + store atomic i8 %element, ptr %ptr seq_cst, align 1 + ret void +} + +define void @f46(<8 x i16> %val, ptr %base) { +; CHECK-LABEL: f46: +; CHECK: # %bb.0: +; CHECK-NEXT: vsteh %v24, 4094(%r2), 5 +; CHECK-NEXT: bcr 14, %r0 +; CHECK-NEXT: br %r14 + %ptr = getelementptr i16, ptr %base, i32 2047 + %element = extractelement <8 x i16> %val, i32 5 + store atomic i16 %element, ptr %ptr seq_cst, align 2 + ret void +} + +define void @f47(<4 x i32> %val, ptr %ptr) { +; CHECK-LABEL: f47: +; CHECK: # %bb.0: +; CHECK-NEXT: vstef %v24, 0(%r2), 3 +; CHECK-NEXT: bcr 14, %r0 +; CHECK-NEXT: br %r14 + %element = extractelement <4 x i32> %val, i32 3 + store atomic i32 %element, ptr %ptr seq_cst, align 4 + ret void +} + +define void @f48(<2 x i64> %val, ptr %ptr) { +; CHECK-LABEL: f48: +; CHECK: # %bb.0: +; CHECK-NEXT: vsteg %v24, 0(%r2), 1 +; CHECK-NEXT: bcr 14, %r0 +; CHECK-NEXT: br %r14 + %element = extractelement <2 x i64> %val, i32 1 + store atomic i64 %element, ptr %ptr seq_cst, align 8 + ret void +} + +define void @f49(<4 x float> %val, ptr %ptr) { +; CHECK-LABEL: f49: +; CHECK: # %bb.0: +; CHECK-NEXT: vstef %v24, 0(%r2), 0 +; CHECK-NEXT: bcr 14, %r0 +; CHECK-NEXT: br %r14 + %element = extractelement <4 x float> %val, i32 0 + store atomic float %element, ptr %ptr seq_cst, align 4 + ret void +} + +define void @f50(<2 x double> %val, ptr %ptr) { +; CHECK-LABEL: f50: +; CHECK: # %bb.0: +; CHECK-NEXT: vsteg %v24, 0(%r2), 1 +; CHECK-NEXT: bcr 14, %r0 +; CHECK-NEXT: br %r14 + %element = extractelement <2 x double> %val, i32 1 + store atomic double %element, ptr %ptr seq_cst, align 8 + ret void +} + +define void @f51(ptr %src, ptr %dst) { +; CHECK-LABEL: f51: +; CHECK: # %bb.0: +; CHECK-NEXT: lpq %r0, 0(%r2) +; CHECK-NEXT: vlvgp %v0, %r0, %r1 +; CHECK-NEXT: vgmf %v1, 2, 8 +; CHECK-NEXT: aebr %f0, %f1 +; CHECK-NEXT: ste %f0, 0(%r3) +; CHECK-NEXT: bcr 14, %r0 +; CHECK-NEXT: br %r14 + %atomic-load = load atomic i128, ptr %src seq_cst, align 16 + %b0 = bitcast i128 %atomic-load to <4 x float> + %vecext = extractelement <4 x float> %b0, i64 0 + %add = fadd float %vecext, 1.000000e+00 + store atomic float %add, ptr %dst seq_cst, align 4 + ret void +} + +define void @f52(ptr %src, ptr %dst) { +; CHECK-LABEL: f52: +; CHECK: # %bb.0: +; CHECK-NEXT: lpq %r0, 0(%r2) +; CHECK-NEXT: vlvgp %v0, %r0, %r1 +; CHECK-NEXT: vgmg %v1, 2, 11 +; CHECK-NEXT: adbr %f0, %f1 +; CHECK-NEXT: std %f0, 0(%r3) +; CHECK-NEXT: bcr 14, %r0 +; CHECK-NEXT: br %r14 + %atomic-load = load atomic i128, ptr %src seq_cst, align 16 + %b0 = bitcast i128 %atomic-load to <2 x double> + %vecext = extractelement <2 x double> %b0, i64 0 + %add = fadd double %vecext, 1.000000e+00 + store atomic double %add, ptr %dst seq_cst, align 8 + ret void +} + +define void @fun58(ptr %ptr, i64 %arg) { +; CHECK-LABEL: fun58: +; CHECK: # %bb.0: +; CHECK-NEXT: st %r3, 0(%r2) +; CHECK-NEXT: bcr 14, %r0 +; CHECK-NEXT: br %r14 + %res = trunc i64 %arg to i32 + store atomic i32 %res, ptr %ptr seq_cst, align 4 + ret void +} diff --git a/llvm/test/CodeGen/SystemZ/atomic-store-06.ll b/llvm/test/CodeGen/SystemZ/atomic-store-06.ll index b748bfc767a4d..91e324b0af1a9 100644 --- a/llvm/test/CodeGen/SystemZ/atomic-store-06.ll +++ b/llvm/test/CodeGen/SystemZ/atomic-store-06.ll @@ -6,10 +6,7 @@ define void @f1(ptr %src, float %val) { ; CHECK-LABEL: f1: ; CHECK: # %bb.0: -; CHECK-NEXT: # kill: def $f0s killed $f0s def $f0d -; CHECK-NEXT: lgdr %r0, %f0 -; CHECK-NEXT: srlg %r0, %r0, 32 -; CHECK-NEXT: st %r0, 0(%r2) +; CHECK-NEXT: ste %f0, 0(%r2) ; CHECK-NEXT: bcr 15, %r0 ; CHECK-NEXT: br %r14 store atomic float %val, ptr %src seq_cst, align 4