diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index af82b6cdb1809..301e92c8a9d31 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -7662,238 +7662,212 @@ static void foldADDIForLocalExecAccesses(SDNode *N, SelectionDAG *DAG) { DAG->RemoveDeadNode(InitialADDI.getNode()); } -void PPCDAGToDAGISel::PeepholePPC64() { - SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); - - while (Position != CurDAG->allnodes_begin()) { - SDNode *N = &*--Position; - // Skip dead nodes and any non-machine opcodes. - if (N->use_empty() || !N->isMachineOpcode()) - continue; +static bool isValidMemOp(SDNode *N, bool &IsLoad, MaybeAlign &ExtraAlign) { + switch (N->getMachineOpcode()) { + default: + return false; + // Global must be word-aligned for LD, STD, LWA. + case PPC::LWA: + case PPC::LD: + case PPC::DFLOADf64: + case PPC::DFLOADf32: + ExtraAlign = Align(4); + [[fallthrough]]; + case PPC::LBZ: + case PPC::LBZ8: + case PPC::LFD: + case PPC::LFS: + case PPC::LHA: + case PPC::LHA8: + case PPC::LHZ: + case PPC::LHZ8: + case PPC::LWZ: + case PPC::LWZ8: + IsLoad = true; + return true; + case PPC::STD: + case PPC::DFSTOREf64: + case PPC::DFSTOREf32: + ExtraAlign = Align(4); + [[fallthrough]]; + case PPC::STB: + case PPC::STB8: + case PPC::STFD: + case PPC::STFS: + case PPC::STH: + case PPC::STH8: + case PPC::STW: + case PPC::STW8: + return true; + } +} - if (isVSXSwap(SDValue(N, 0))) - reduceVSXSwap(N, CurDAG); +static bool isMemBaseCombinable(SDValue Base) { + if (!Base.isMachineOpcode()) + return false; + switch (Base.getMachineOpcode()) { + default: + return false; + case PPC::ADDI8: + case PPC::ADDI: + case PPC::ADDIdtprelL: + case PPC::ADDItlsldL: + case PPC::ADDItocL8: + case PPC::ADDItoc: + case PPC::ADDItoc8: + return true; + } +} - // This optimization is performed for non-TOC-based local-exec accesses. - foldADDIForLocalExecAccesses(N, CurDAG); +static void peepholeMemOffset(SDNode *N, SelectionDAG *DAG, + const PPCSubtarget *Subtarget) { + // TODO: Enable for AIX 32-bit. + if (!Subtarget->isPPC64()) + return; - unsigned FirstOp; - unsigned StorageOpcode = N->getMachineOpcode(); - bool RequiresMod4Offset = false; + bool IsLoad = false; + MaybeAlign ExtraAlign; + if (!isValidMemOp(N, IsLoad, ExtraAlign)) + return; - switch (StorageOpcode) { - default: continue; + SDValue MemBase = N->getOperand(IsLoad ? 1 : 2); + if (!isMemBaseCombinable(MemBase)) + return; - case PPC::LWA: - case PPC::LD: - case PPC::DFLOADf64: - case PPC::DFLOADf32: - RequiresMod4Offset = true; - [[fallthrough]]; - case PPC::LBZ: - case PPC::LBZ8: - case PPC::LFD: - case PPC::LFS: - case PPC::LHA: - case PPC::LHA8: - case PPC::LHZ: - case PPC::LHZ8: - case PPC::LWZ: - case PPC::LWZ8: - FirstOp = 0; - break; + // Only additions with constant offsets will be folded. + auto *MemOffset = dyn_cast(N->getOperand(IsLoad ? 0 : 1)); + if (!MemOffset) + return; - case PPC::STD: - case PPC::DFSTOREf64: - case PPC::DFSTOREf32: - RequiresMod4Offset = true; - [[fallthrough]]; - case PPC::STB: - case PPC::STB8: - case PPC::STFD: - case PPC::STFS: - case PPC::STH: - case PPC::STH8: - case PPC::STW: - case PPC::STW8: - FirstOp = 1; - break; - } + SDValue ImmOp, RegOp; + unsigned BaseOpc = MemBase.getMachineOpcode(); + assert(MemBase.getNumOperands() == 2 && "Invalid base of memop with offset!"); - // If this is a load or store with a zero offset, or within the alignment, - // we may be able to fold an add-immediate into the memory operation. - // The check against alignment is below, as it can't occur until we check - // the arguments to N - if (!isa(N->getOperand(FirstOp))) - continue; + // ADDItoc and ADDItoc8 ('la') puts the register at the second operand. + if (BaseOpc == PPC::ADDItoc || BaseOpc == PPC::ADDItoc8) { + ImmOp = MemBase.getOperand(0); + RegOp = MemBase.getOperand(1); + } else { + ImmOp = MemBase.getOperand(1); + RegOp = MemBase.getOperand(0); + } - SDValue Base = N->getOperand(FirstOp + 1); - if (!Base.isMachineOpcode()) - continue; + MaybeAlign ImmAlign; + if (auto *GA = dyn_cast(ImmOp)) + ImmAlign = GA->getGlobal()->getPointerAlignment(DAG->getDataLayout()); + else if (auto *CP = dyn_cast(ImmOp)) + ImmAlign = CP->getAlign(); - unsigned Flags = 0; - bool ReplaceFlags = true; + if (ImmAlign && ExtraAlign && ImmAlign.value() < ExtraAlign.value()) + return; - // When the feeding operation is an add-immediate of some sort, - // determine whether we need to add relocation information to the - // target flags on the immediate operand when we fold it into the - // load instruction. - // - // For something like ADDItocL8, the relocation information is - // inferred from the opcode; when we process it in the AsmPrinter, - // we add the necessary relocation there. A load, though, can receive - // relocation from various flavors of ADDIxxx, so we need to carry - // the relocation information in the target flags. - switch (Base.getMachineOpcode()) { - default: continue; - - case PPC::ADDI8: - case PPC::ADDI: - // In some cases (such as TLS) the relocation information - // is already in place on the operand, so copying the operand - // is sufficient. - ReplaceFlags = false; - break; - case PPC::ADDIdtprelL: - Flags = PPCII::MO_DTPREL_LO; - break; - case PPC::ADDItlsldL: - Flags = PPCII::MO_TLSLD_LO; - break; - case PPC::ADDItocL8: - Flags = PPCII::MO_TOC_LO; - break; + // On PPC64, the TOC base pointer is guaranteed by the ABI only to have + // 8-byte alignment, and so we can only use offsets less than 8 (otherwise, + // we might have needed different @ha relocation values for the offset + // pointers). + int MaxDisplacement = 7; + if (ImmAlign && ImmAlign.value().value() < 8) + MaxDisplacement = (int)ImmAlign.value().value() - 1; + + // If addis also contributes to TOC relocation, it also needs to be updated. + bool UpdateHaBase = false; + SDValue HaBase = MemBase.getOperand(0); + int64_t Offset = MemOffset->getSExtValue(); + + // Some flags in addition needs to be carried to new memop. + PPCII::TOF NewOpFlags = PPCII::MO_NO_FLAG; + if (BaseOpc == PPC::ADDIdtprelL) + NewOpFlags = PPCII::MO_DTPREL_LO; + else if (BaseOpc == PPC::ADDItlsldL) + NewOpFlags = PPCII::MO_TLSLD_LO; + else if (BaseOpc == PPC::ADDItocL8) + NewOpFlags = PPCII::MO_TOC_LO; + + if (NewOpFlags) { + if (Offset < 0 || Offset > MaxDisplacement) { + // Check base opcode and its uses, quit if it has multiple uses. + if (MemBase.getMachineOpcode() != PPC::ADDItocL8 || !MemBase.hasOneUse() || + !HaBase.isMachineOpcode() || !HaBase.hasOneUse() || + HaBase.getMachineOpcode() != PPC::ADDIStocHA8 || + HaBase.getOperand(1) != ImmOp) + return; + UpdateHaBase = true; } - SDValue ImmOpnd = Base.getOperand(1); - - // On PPC64, the TOC base pointer is guaranteed by the ABI only to have - // 8-byte alignment, and so we can only use offsets less than 8 (otherwise, - // we might have needed different @ha relocation values for the offset - // pointers). - int MaxDisplacement = 7; - if (GlobalAddressSDNode *GA = dyn_cast(ImmOpnd)) { - const GlobalValue *GV = GA->getGlobal(); - Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout()); - MaxDisplacement = std::min((int)Alignment.value() - 1, MaxDisplacement); + if (const auto *GA = dyn_cast(ImmOp)) { + // We can't perform this optimization for data whose alignment is + // insufficient for the instruction encoding. + if (ImmAlign && ImmAlign.value() < Align(4) && + (ExtraAlign || (Offset % 4) != 0)) + return; + ImmOp = DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(ImmOp), + MVT::i64, Offset, NewOpFlags); + } else if (const auto *CP = dyn_cast(ImmOp)) { + ImmOp = DAG->getTargetConstantPool(CP->getConstVal(), MVT::i64, + CP->getAlign(), Offset, NewOpFlags); } + } else { + if (ImmAlign && ExtraAlign && ImmAlign.value() < ExtraAlign.value()) + return; + if (auto *C = dyn_cast(ImmOp)) { + Offset += C->getSExtValue(); - bool UpdateHBase = false; - SDValue HBase = Base.getOperand(0); - - int Offset = N->getConstantOperandVal(FirstOp); - if (ReplaceFlags) { - if (Offset < 0 || Offset > MaxDisplacement) { - // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only - // one use, then we can do this for any offset, we just need to also - // update the offset (i.e. the symbol addend) on the addis also. - if (Base.getMachineOpcode() != PPC::ADDItocL8) - continue; - - if (!HBase.isMachineOpcode() || - HBase.getMachineOpcode() != PPC::ADDIStocHA8) - continue; - - if (!Base.hasOneUse() || !HBase.hasOneUse()) - continue; - - SDValue HImmOpnd = HBase.getOperand(1); - if (HImmOpnd != ImmOpnd) - continue; - - UpdateHBase = true; - } - } else { - // Global addresses can be folded, but only if they are sufficiently - // aligned. - if (RequiresMod4Offset) { - if (GlobalAddressSDNode *GA = - dyn_cast(ImmOpnd)) { - const GlobalValue *GV = GA->getGlobal(); - Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout()); - if (Alignment < 4) - continue; - } + if ((Offset % ExtraAlign.valueOrOne().value()) != 0 || !isInt<16>(Offset)) + return; + ImmOp = + DAG->getTargetConstant(Offset, SDLoc(ImmOp), ImmOp.getValueType()); + } else if (Offset != 0) { + // This optimization is performed for non-TOC-based local-exec accesses. + if (isEligibleToFoldADDIForLocalExecAccesses(DAG, MemBase)) { + // Add the non-zero offset information into the load or store + // instruction to be used for non-TOC-based local-exec accesses. + GlobalAddressSDNode *GA = dyn_cast(ImmOp); + assert(GA && "Expecting a valid GlobalAddressSDNode when folding " + "addi into local-exec accesses!"); + ImmOp = DAG->getTargetGlobalAddress( + GA->getGlobal(), SDLoc(GA), MVT::i64, Offset, GA->getTargetFlags()); + } else { + return; } + } + } - // If we're directly folding the addend from an addi instruction, then: - // 1. In general, the offset on the memory access must be zero. - // 2. If the addend is a constant, then it can be combined with a - // non-zero offset, but only if the result meets the encoding - // requirements. - if (auto *C = dyn_cast(ImmOpnd)) { - Offset += C->getSExtValue(); + LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); + LLVM_DEBUG(MemBase->dump(DAG)); + LLVM_DEBUG(dbgs() << "\nN: "); + LLVM_DEBUG(N->dump(DAG)); + LLVM_DEBUG(dbgs() << "\n"); - if (RequiresMod4Offset && (Offset % 4) != 0) - continue; + if (IsLoad) + (void)DAG->UpdateNodeOperands(N, ImmOp, RegOp, N->getOperand(2)); + else + (void)DAG->UpdateNodeOperands(N, N->getOperand(0), ImmOp, RegOp, + N->getOperand(3)); - if (!isInt<16>(Offset)) - continue; + if (UpdateHaBase) + (void)DAG->UpdateNodeOperands(HaBase.getNode(), HaBase.getOperand(0), + ImmOp); - ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd), - ImmOpnd.getValueType()); - } else if (Offset != 0) { - // This optimization is performed for non-TOC-based local-exec accesses. - if (isEligibleToFoldADDIForLocalExecAccesses(CurDAG, Base)) { - // Add the non-zero offset information into the load or store - // instruction to be used for non-TOC-based local-exec accesses. - GlobalAddressSDNode *GA = dyn_cast(ImmOpnd); - assert(GA && "Expecting a valid GlobalAddressSDNode when folding " - "addi into local-exec accesses!"); - ImmOpnd = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA), - MVT::i64, Offset, - GA->getTargetFlags()); - } else - continue; - } - } + if (MemBase.getNode()->use_empty()) + DAG->RemoveDeadNode(MemBase.getNode()); +} - // We found an opportunity. Reverse the operands from the add - // immediate and substitute them into the load or store. If - // needed, update the target flags for the immediate operand to - // reflect the necessary relocation information. - LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); - LLVM_DEBUG(Base->dump(CurDAG)); - LLVM_DEBUG(dbgs() << "\nN: "); - LLVM_DEBUG(N->dump(CurDAG)); - LLVM_DEBUG(dbgs() << "\n"); +void PPCDAGToDAGISel::PeepholePPC64() { + SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); - // If the relocation information isn't already present on the - // immediate operand, add it now. - if (ReplaceFlags) { - if (GlobalAddressSDNode *GA = dyn_cast(ImmOpnd)) { - SDLoc dl(GA); - const GlobalValue *GV = GA->getGlobal(); - Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout()); - // We can't perform this optimization for data whose alignment - // is insufficient for the instruction encoding. - if (Alignment < 4 && (RequiresMod4Offset || (Offset % 4) != 0)) { - LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n"); - continue; - } - ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags); - } else if (ConstantPoolSDNode *CP = - dyn_cast(ImmOpnd)) { - const Constant *C = CP->getConstVal(); - ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, CP->getAlign(), - Offset, Flags); - } - } + while (Position != CurDAG->allnodes_begin()) { + SDNode *N = &*--Position; + // Skip dead nodes and any non-machine opcodes. + if (N->use_empty() || !N->isMachineOpcode()) + continue; - if (FirstOp == 1) // Store - (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd, - Base.getOperand(0), N->getOperand(3)); - else // Load - (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0), - N->getOperand(2)); + if (isVSXSwap(SDValue(N, 0))) + reduceVSXSwap(N, CurDAG); - if (UpdateHBase) - (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0), - ImmOpnd); + // This optimization is performed for non-TOC-based local-exec accesses. + foldADDIForLocalExecAccesses(N, CurDAG); - // The add-immediate may now be dead, in which case remove it. - if (Base.getNode()->use_empty()) - CurDAG->RemoveDeadNode(Base.getNode()); + peepholeMemOffset(N, CurDAG, Subtarget); } } diff --git a/llvm/test/CodeGen/PowerPC/toc-data-common.ll b/llvm/test/CodeGen/PowerPC/toc-data-common.ll index 7747f2eecc935..aa9db48c9937f 100644 --- a/llvm/test/CodeGen/PowerPC/toc-data-common.ll +++ b/llvm/test/CodeGen/PowerPC/toc-data-common.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s -DINSTR=lwz --check-prefix=CHECK -; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s -DINSTR=ld --check-prefix=CHECK +; RUN: llc -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s -DINSTR=lwz --check-prefixes=CHECK32,CHECK +; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s -DINSTR=ld --check-prefixes=CHECK64,CHECK ; RUN: llc -filetype=obj -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs < %s -o %t32.o ; RUN: llvm-objdump -t --symbol-description %t32.o | FileCheck %s --check-prefix=OBJ32 @@ -14,17 +14,27 @@ @a4 = global i32 0, align 4 define void @set(i32 noundef %_a) { -; CHECK-LABEL: set: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: la 4, a2[TD](2) -; CHECK-NEXT: la 5, a1[TD](2) -; CHECK-NEXT: stw 3, 0(4) -; CHECK-NEXT: [[INSTR]] 4, L..C0(2) # @a4 -; CHECK-NEXT: stw 3, 0(5) -; CHECK-NEXT: [[INSTR]] 5, L..C1(2) # @a3 -; CHECK-NEXT: stw 3, 0(4) -; CHECK-NEXT: stw 3, 0(5) -; CHECK-NEXT: blr +; CHECK32-LABEL: set: +; CHECK32: # %bb.0: # %entry +; CHECK32-NEXT: la 4, a2[TD](2) +; CHECK32-NEXT: la 5, a1[TD](2) +; CHECK32-NEXT: stw 3, 0(4) +; CHECK32-NEXT: lwz 4, L..C0(2) # @a4 +; CHECK32-NEXT: stw 3, 0(5) +; CHECK32-NEXT: lwz 5, L..C1(2) # @a3 +; CHECK32-NEXT: stw 3, 0(4) +; CHECK32-NEXT: stw 3, 0(5) +; CHECK32-NEXT: blr +; +; CHECK64-LABEL: set: +; CHECK64: # %bb.0: # %entry +; CHECK64-NEXT: ld 4, L..C0(2) # @a4 +; CHECK64-NEXT: stw 3, a2[TD](2) +; CHECK64-NEXT: stw 3, a1[TD](2) +; CHECK64-NEXT: stw 3, 0(4) +; CHECK64-NEXT: ld 4, L..C1(2) # @a3 +; CHECK64-NEXT: stw 3, 0(4) +; CHECK64-NEXT: blr entry: store i32 %_a, ptr @a2, align 4 store i32 %_a, ptr @a1, align 4 @@ -34,81 +44,123 @@ ret void } define i32 @get1() { -; CHECK-LABEL: get1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: la 3, a2[TD](2) -; CHECK-NEXT: lwz 3, 0(3) -; CHECK-NEXT: blr +; CHECK32-LABEL: get1: +; CHECK32: # %bb.0: # %entry +; CHECK32-NEXT: la 3, a2[TD](2) +; CHECK32-NEXT: lwz 3, 0(3) +; CHECK32-NEXT: blr +; +; CHECK64-LABEL: get1: +; CHECK64: # %bb.0: # %entry +; CHECK64-NEXT: lwz 3, a2[TD](2) +; CHECK64-NEXT: blr entry: %0 = load i32, ptr @a2, align 4 ret i32 %0 } define i32 @get2() { -; CHECK-LABEL: get2: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: la 3, a1[TD](2) -; CHECK-NEXT: lwz 3, 0(3) -; CHECK-NEXT: blr +; CHECK32-LABEL: get2: +; CHECK32: # %bb.0: # %entry +; CHECK32-NEXT: la 3, a1[TD](2) +; CHECK32-NEXT: lwz 3, 0(3) +; CHECK32-NEXT: blr +; +; CHECK64-LABEL: get2: +; CHECK64: # %bb.0: # %entry +; CHECK64-NEXT: lwz 3, a1[TD](2) +; CHECK64-NEXT: blr entry: %0 = load i32, ptr @a1, align 4 ret i32 %0 } define i32 @get3() { -; CHECK-LABEL: get3: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: [[INSTR]] 3, L..C0(2) # @a4 -; CHECK-NEXT: lwz 3, 0(3) -; CHECK-NEXT: blr +; CHECK32-LABEL: get3: +; CHECK32: # %bb.0: # %entry +; CHECK32-NEXT: lwz 3, L..C0(2) # @a4 +; CHECK32-NEXT: lwz 3, 0(3) +; CHECK32-NEXT: blr +; +; CHECK64-LABEL: get3: +; CHECK64: # %bb.0: # %entry +; CHECK64-NEXT: ld 3, L..C0(2) # @a4 +; CHECK64-NEXT: lwz 3, 0(3) +; CHECK64-NEXT: blr entry: %0 = load i32, ptr @a4, align 4 ret i32 %0 } define i32 @get4() { -; CHECK-LABEL: get4: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: [[INSTR]] 3, L..C1(2) # @a3 -; CHECK-NEXT: lwz 3, 0(3) -; CHECK-NEXT: blr +; CHECK32-LABEL: get4: +; CHECK32: # %bb.0: # %entry +; CHECK32-NEXT: lwz 3, L..C1(2) # @a3 +; CHECK32-NEXT: lwz 3, 0(3) +; CHECK32-NEXT: blr +; +; CHECK64-LABEL: get4: +; CHECK64: # %bb.0: # %entry +; CHECK64-NEXT: ld 3, L..C1(2) # @a3 +; CHECK64-NEXT: lwz 3, 0(3) +; CHECK64-NEXT: blr entry: %0 = load i32, ptr @a3, align 4 ret i32 %0 } define nonnull ptr @escape1() { -; CHECK-LABEL: escape1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: la 3, a2[TD](2) -; CHECK-NEXT: blr +; CHECK32-LABEL: escape1: +; CHECK32: # %bb.0: # %entry +; CHECK32-NEXT: la 3, a2[TD](2) +; CHECK32-NEXT: blr +; +; CHECK64-LABEL: escape1: +; CHECK64: # %bb.0: # %entry +; CHECK64-NEXT: la 3, a2[TD](2) +; CHECK64-NEXT: blr entry: ret ptr @a2 } define nonnull ptr @escape2() { -; CHECK-LABEL: escape2: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: la 3, a1[TD](2) -; CHECK-NEXT: blr +; CHECK32-LABEL: escape2: +; CHECK32: # %bb.0: # %entry +; CHECK32-NEXT: la 3, a1[TD](2) +; CHECK32-NEXT: blr +; +; CHECK64-LABEL: escape2: +; CHECK64: # %bb.0: # %entry +; CHECK64-NEXT: la 3, a1[TD](2) +; CHECK64-NEXT: blr entry: ret ptr @a1 } define nonnull ptr @escape3() { -; CHECK-LABEL: escape3: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: [[INSTR]] 3, L..C0(2) # @a4 -; CHECK-NEXT: blr +; CHECK32-LABEL: escape3: +; CHECK32: # %bb.0: # %entry +; CHECK32-NEXT: lwz 3, L..C0(2) # @a4 +; CHECK32-NEXT: blr +; +; CHECK64-LABEL: escape3: +; CHECK64: # %bb.0: # %entry +; CHECK64-NEXT: ld 3, L..C0(2) # @a4 +; CHECK64-NEXT: blr entry: ret ptr @a4 } define nonnull ptr @escape4() { -; CHECK-LABEL: escape4: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: [[INSTR]] 3, L..C1(2) # @a3 -; CHECK-NEXT: blr +; CHECK32-LABEL: escape4: +; CHECK32: # %bb.0: # %entry +; CHECK32-NEXT: lwz 3, L..C1(2) # @a3 +; CHECK32-NEXT: blr +; +; CHECK64-LABEL: escape4: +; CHECK64: # %bb.0: # %entry +; CHECK64-NEXT: ld 3, L..C1(2) # @a3 +; CHECK64-NEXT: blr entry: ret ptr @a3 } diff --git a/llvm/test/CodeGen/PowerPC/toc-data-const.ll b/llvm/test/CodeGen/PowerPC/toc-data-const.ll index 6972079d826e0..7a4b0acc4ebc2 100644 --- a/llvm/test/CodeGen/PowerPC/toc-data-const.ll +++ b/llvm/test/CodeGen/PowerPC/toc-data-const.ll @@ -1,5 +1,8 @@ -; RUN: llc -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s --check-prefix CHECK -; RUN: llc -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s --check-prefix CHECK +; RUN: llc -mtriple powerpc-ibm-aix-xcoff -O0 < %s | FileCheck %s --check-prefixes=CHECK,CHECK32,NOOPT +; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -O0 < %s | FileCheck %s --check-prefixes=CHECK,CHECK64,NOOPT + +; RUN: llc -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s --check-prefixes=CHECK,CHECK32,NOOPT +; RUN: llc -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s --check-prefixes=CHECK,CHECK64,OPT ; RUN: llc -filetype=obj -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs < %s -o %t32.o ; RUN: llvm-readobj %t32.o --syms --relocs | FileCheck %s -D#NFA=2 --check-prefix=OBJ32 @@ -23,15 +26,19 @@ define ptr @retptr() { attributes #0 = { "toc-data" } -; CHECK: .read: -; CHECK: la 3, i1[TD](2) +; CHECK-LABEL: .read: +; NOOPT: la 3, i1[TD](2) +; NOOPT: lwz 3, 0(3) +; OPT: lwz 3, i1[TD](2) -; CHECK: .retptr: -; CHECK: la 3, i2[TD](2) +; CHECK-LABEL: .retptr: +; CHECK: la 3, i2[TD](2) +; CHECK-NEXT: blr ; CHECK-DAG: .toc ; CHECK: .extern i1[TD] -; CHECK: .csect i2[TD] +; CHECK32: .csect i2[TD],2 +; CHECK64: .csect i2[TD],3 ; OBJ32: Relocations [ ; OBJ32-NEXT: Section (index: 1) .text { @@ -111,14 +118,14 @@ attributes #0 = { "toc-data" } ; OBJ64: Relocations [ ; OBJ64-NEXT: Section (index: 1) .text { ; OBJ64-NEXT: 0x2 R_TOC i1([[#NFA+1]]) 0xF -; OBJ64-NEXT: 0x26 R_TOC i2([[#NFA+15]]) 0xF +; OBJ64-NEXT: 0x22 R_TOC i2([[#NFA+15]]) 0xF ; OBJ64-NEXT: } ; OBJ64-NEXT: Section (index: 2) .data { -; OBJ64-NEXT: 0x48 R_POS .read([[#NFA+5]]) 0x3F -; OBJ64-NEXT: 0x50 R_POS TOC([[#NFA+13]]) 0x3F -; OBJ64-NEXT: 0x60 R_POS .retptr([[#NFA+7]]) 0x3F -; OBJ64-NEXT: 0x68 R_POS TOC([[#NFA+13]]) 0x3F -; OBJ64-NEXT: 0x78 R_POS i1([[#NFA+1]]) 0x3F +; OBJ64-NEXT: 0x40 R_POS .read([[#NFA+5]]) 0x3F +; OBJ64-NEXT: 0x48 R_POS TOC([[#NFA+13]]) 0x3F +; OBJ64-NEXT: 0x58 R_POS .retptr([[#NFA+7]]) 0x3F +; OBJ64-NEXT: 0x60 R_POS TOC([[#NFA+13]]) 0x3F +; OBJ64-NEXT: 0x70 R_POS i1([[#NFA+1]]) 0x3F ; OBJ64-NEXT: } ; OBJ64-NEXT: ] @@ -144,7 +151,7 @@ attributes #0 = { "toc-data" } ; OBJ64: Symbol { ; OBJ64: Index: [[#NFA+13]] ; OBJ64-NEXT: Name: TOC -; OBJ64-NEXT: Value (RelocatableAddress): 0x78 +; OBJ64-NEXT: Value (RelocatableAddress): 0x70 ; OBJ64-NEXT: Section: .data ; OBJ64-NEXT: Type: 0x0 ; OBJ64-NEXT: StorageClass: C_HIDEXT (0x6B) @@ -163,7 +170,7 @@ attributes #0 = { "toc-data" } ; OBJ64: Symbol { ; OBJ64: Index: [[#NFA+15]] ; OBJ64-NEXT: Name: i2 -; OBJ64-NEXT: Value (RelocatableAddress): 0x78 +; OBJ64-NEXT: Value (RelocatableAddress): 0x70 ; OBJ64-NEXT: Section: .data ; OBJ64-NEXT: Type: 0x0 ; OBJ64-NEXT: StorageClass: C_EXT (0x2) @@ -188,8 +195,8 @@ attributes #0 = { "toc-data" } ; DIS32-NEXT: 00000026: R_TOC i2 ; DIS64: 0000000000000000 <.read>: -; DIS64-NEXT: 0: 38 62 00 00 addi 3, 2, 0 +; DIS64-NEXT: 0: 80 62 00 00 lwz 3, 0(2) ; DIS64-NEXT: 0000000000000002: R_TOC i1 -; DIS64: 0000000000000024 <.retptr>: -; DIS64-NEXT: 24: 38 62 00 00 addi 3, 2, 0 -; DIS64-NEXT: 0000000000000026: R_TOC i2 +; DIS64: 0000000000000020 <.retptr>: +; DIS64-NEXT: 20: 38 62 00 00 addi 3, 2, 0 +; DIS64-NEXT: 0000000000000022: R_TOC i2 diff --git a/llvm/test/CodeGen/PowerPC/toc-data-peephole-aligment.ll b/llvm/test/CodeGen/PowerPC/toc-data-peephole-aligment.ll new file mode 100644 index 0000000000000..8ec5d9fd33175 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/toc-data-peephole-aligment.ll @@ -0,0 +1,16 @@ +; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s + +@underaligned = dso_local global i32 123, align 1 #0 + +define i64 @read() { +entry: + %0 = load i32, ptr @underaligned, align 1 + %1 = sext i32 %0 to i64 + ret i64 %1 +} + +attributes #0 = { "toc-data" } + +; CHECK-LABEL: .read +; CHECK: la [[DEF:[0-9]+]], underaligned[TD](2) +; CHCEK: lwa {{[0-9]+}}, 0([[DEF]]) diff --git a/llvm/test/CodeGen/PowerPC/toc-data-struct-array.ll b/llvm/test/CodeGen/PowerPC/toc-data-struct-array.ll index a5c9a8b909d1c..078e14fb5f0cb 100644 --- a/llvm/test/CodeGen/PowerPC/toc-data-struct-array.ll +++ b/llvm/test/CodeGen/PowerPC/toc-data-struct-array.ll @@ -74,7 +74,7 @@ attributes #1 = { noinline } ; OBJ64: Symbol { ; OBJ64: Name: a -; OBJ64-NEXT: Value (RelocatableAddress): 0x48 +; OBJ64-NEXT: Value (RelocatableAddress): 0x40 ; OBJ64-NEXT: Section: .data ; OBJ64-NEXT: Type: 0x0 ; OBJ64-NEXT: StorageClass: C_EXT (0x2) @@ -92,7 +92,7 @@ attributes #1 = { noinline } ; OBJ64-NEXT: } ; OBJ64-NEXT: Symbol { ; OBJ64: Name: b -; OBJ64-NEXT: Value (RelocatableAddress): 0x4C +; OBJ64-NEXT: Value (RelocatableAddress): 0x44 ; OBJ64-NEXT: Section: .data ; OBJ64-NEXT: Type: 0x0 ; OBJ64-NEXT: StorageClass: C_EXT (0x2) diff --git a/llvm/test/CodeGen/PowerPC/toc-data.ll b/llvm/test/CodeGen/PowerPC/toc-data.ll index cbf3be9fcaad0..a8a7b5d4d386f 100644 --- a/llvm/test/CodeGen/PowerPC/toc-data.ll +++ b/llvm/test/CodeGen/PowerPC/toc-data.ll @@ -3,14 +3,14 @@ ; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs < %s \ ; RUN: -stop-before=ppc-vsx-copy | FileCheck %s --check-prefix CHECK64 ; RUN: llc -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s --check-prefix TEST32 -; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s --check-prefix TEST64 +; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs < %s | FileCheck %s --check-prefixes=TEST64,ASMOPT64 ; RUN: llc -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs < %s \ ; RUN: -stop-before=ppc-vsx-copy -O0 | FileCheck %s --check-prefix CHECK32 ; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs < %s \ ; RUN: -stop-before=ppc-vsx-copy -O0 | FileCheck %s --check-prefix CHECK64-NOOPT ; RUN: llc -mtriple powerpc-ibm-aix-xcoff -verify-machineinstrs -O0 < %s | FileCheck %s --check-prefix TEST32 -; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs -O0 < %s | FileCheck %s --check-prefix TEST64 +; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -verify-machineinstrs -O0 < %s | FileCheck %s --check-prefixes=TEST64,ASMNOOPT64 @i = dso_local global i32 0, align 4 #0 @d = dso_local local_unnamed_addr global double 3.141590e+00, align 8 @@ -32,8 +32,7 @@ define dso_local void @write_int(i32 signext %in) { ; TEST32-NEXT: stw 3, 0(4) ; CHECK64: name: write_int -; CHECK64: %[[SCRATCH:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDItoc8 @i, $x2 -; CHECK64-NEXT: STW8 %{{[0-9]+}}, 0, killed %[[SCRATCH]] :: (store (s32) into @i) +; CHECK64: STW8 %{{[0-9]+}}, @i, $x2 :: (store (s32) into @i) ; CHECK64-NOOPT: name: write_int ; CHECK64-NOOPT: %[[SUBREG:[0-9]+]]:gprc = COPY %{{[0-9]}}.sub_32 @@ -41,8 +40,9 @@ define dso_local void @write_int(i32 signext %in) { ; CHECK64-NOOPT: STW %[[SUBREG]], 0, killed %[[ADDR]] :: (store (s32) into @i) ; TEST64: .write_int: -; TEST64: la 4, i[TD](2) -; TEST64-NEXT: stw 3, 0(4) +; ASMNOOPT64: la 4, i[TD](2) +; ASMNOOPT64-NEXT: stw 3, 0(4) +; ASMOPT64: stw 3, i[TD](2) define dso_local i64 @read_ll() { @@ -85,16 +85,16 @@ define dso_local float @read_float() { ; TEST32-NEXT: lfs 1, 0(3) ; CHECK64: name: read_float -; CHECK64: %[[SCRATCH:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDItoc8 @f, $x2 -; CHECK64: %{{[0-9]+}}:f4rc = LFS 0, killed %[[SCRATCH]] :: (dereferenceable load (s32) from @f) +; CHECK64: %{{[0-9]+}}:f4rc = LFS @f, $x2 :: (dereferenceable load (s32) from @f) ; CHECK64-NOOPT: name: read_float ; CHECK64-NOOPT: %[[SCRATCH:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDItoc8 @f, $x2 ; CHECK64-NOOPT: %{{[0-9]+}}:f4rc = LFS 0, killed %[[SCRATCH]] -; TEST64: .read_float: -; TEST64: la 3, f[TD](2) -; TEST64-NEXT: lfs 1, 0(3) +; TEST64: .read_float: +; ASMNOOPT64: la 3, f[TD](2) +; ASMNOOPT64-NEXT: lfs 1, 0(3) +; ASMOPT64: lfs 1, f[TD](2) define dso_local void @write_double(double %in) {