From 3d152045cbff62643ff038ab6c9f52acf288d444 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Wed, 24 Sep 2025 10:45:35 -0700 Subject: [PATCH] [AArch64LoadStoreOpt] Allow monotonic atomics to be paired Right now we only allow unordered atomics to be paired, which results in std::memory_order_relaxed accesses being unpairable. We don't need to worry about same address ordering rules because ldp/stp always access different addresses, so we can allow monotonic accesses to pair as well. --- llvm/include/llvm/CodeGen/MachineInstr.h | 3 +++ llvm/include/llvm/CodeGen/MachineMemOperand.h | 10 ++++++++++ llvm/lib/CodeGen/MachineInstr.cpp | 18 ++++++++++++++++++ llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 2 +- .../AArch64/AArch64LoadStoreOptimizer.cpp | 6 +++--- llvm/test/CodeGen/AArch64/ldst-opt.ll | 14 ++++++++++++++ 6 files changed, 49 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h index 63d74047d2e83..274804ab452ad 100644 --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -1774,6 +1774,9 @@ class MachineInstr /// ordered or volatile memory references. LLVM_ABI bool hasOrderedMemoryRef() const; + /// Like hasOrderedMemoryRef, but allows for same-address ordering. + LLVM_ABI bool hasDifferentAddressOrderedMemoryRef() const; + /// Return true if this load instruction never traps and points to a memory /// location whose value doesn't change during the execution of this function. /// diff --git a/llvm/include/llvm/CodeGen/MachineMemOperand.h b/llvm/include/llvm/CodeGen/MachineMemOperand.h index a297d3d8f8498..f2902c13fcffd 100644 --- a/llvm/include/llvm/CodeGen/MachineMemOperand.h +++ b/llvm/include/llvm/CodeGen/MachineMemOperand.h @@ -318,6 +318,16 @@ class MachineMemOperand { !isVolatile(); } + // Return true if the only ordering constraint on this operation is + // same-address ordering -- basically the same as isUnordered(), but allow + // Monotonic as well. + bool isDifferentAddressUnordered() const { + return (getSuccessOrdering() == AtomicOrdering::NotAtomic || + getSuccessOrdering() == AtomicOrdering::Unordered || + getSuccessOrdering() == AtomicOrdering::Monotonic) && + !isVolatile(); + } + /// Update this MachineMemOperand to reflect the alignment of MMO, if it has a /// greater alignment. This must only be used when the new alignment applies /// to all users of this MachineMemOperand. diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index 2c06c5ad4a5e4..c72f60113168a 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -1587,6 +1587,24 @@ bool MachineInstr::hasOrderedMemoryRef() const { }); } +/// hasDifferentAddressOrderedMemoryRef - Like hasOrderedMemoryRef, but allows +/// same address orderings. +bool MachineInstr::hasDifferentAddressOrderedMemoryRef() const { + // An instruction known never to access memory won't have a volatile access. + if (!mayStore() && !mayLoad() && !isCall() && !hasUnmodeledSideEffects()) + return false; + + // Otherwise, if the instruction has no memory reference information, + // conservatively assume it wasn't preserved. + if (memoperands_empty()) + return true; + + // Check if any of our memory operands are ordered. + return llvm::any_of(memoperands(), [](const MachineMemOperand *MMO) { + return !MMO->isDifferentAddressUnordered(); + }); +} + /// isDereferenceableInvariantLoad - Return true if this instruction will never /// trap and is loading from a location whose value is invariant across a run of /// this function. diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 5a51c812732e6..b4b695a1f864e 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2919,7 +2919,7 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const { bool IsPreLdSt = isPreLdSt(MI); // If this is a volatile load/store, don't mess with it. - if (MI.hasOrderedMemoryRef()) + if (MI.hasDifferentAddressOrderedMemoryRef()) return false; // Make sure this is a reg/fi+imm (as opposed to an address reloc). diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index e69fa32967a79..c219aecf87170 100644 --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -1631,11 +1631,11 @@ static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, LdStPairFlags &Flags, const AArch64InstrInfo *TII) { // If this is volatile or if pairing is suppressed, not a candidate. - if (MI.hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI)) + if (MI.hasDifferentAddressOrderedMemoryRef() || TII->isLdStPairSuppressed(MI)) return false; // We should have already checked FirstMI for pair suppression and volatility. - assert(!FirstMI.hasOrderedMemoryRef() && + assert(!FirstMI.hasDifferentAddressOrderedMemoryRef() && !TII->isLdStPairSuppressed(FirstMI) && "FirstMI shouldn't get here if either of these checks are true."); @@ -2744,7 +2744,7 @@ bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore( MachineBasicBlock::iterator &MBBI) { MachineInstr &MI = *MBBI; // If this is a volatile load, don't mess with it. - if (MI.hasOrderedMemoryRef()) + if (MI.hasDifferentAddressOrderedMemoryRef()) return false; if (needsWinCFI(MI.getMF()) && MI.getFlag(MachineInstr::FrameDestroy)) diff --git a/llvm/test/CodeGen/AArch64/ldst-opt.ll b/llvm/test/CodeGen/AArch64/ldst-opt.ll index 4e09e76457582..48d502a5141ca 100644 --- a/llvm/test/CodeGen/AArch64/ldst-opt.ll +++ b/llvm/test/CodeGen/AArch64/ldst-opt.ll @@ -1697,3 +1697,17 @@ define void @trunc_splat(ptr %ptr) { store <2 x i16> , ptr %ptr, align 4 ret void } + +; CHECK-LABEL: pair_monotonic +; CHECK: ldp x{{[0-9]+}}, x{{[0-9]+}}, [x{{[0-9]+}}] +; CHECK: stp x{{[0-9]+}}, x{{[0-9]+}}, [x{{[0-9]+}}] +define void @pair_monotonic(ptr %i, ptr %o) { +entry: + %0 = load atomic i64, ptr %i monotonic, align 8 + %hi = getelementptr inbounds nuw i8, ptr %i, i64 8 + %1 = load atomic i64, ptr %hi monotonic, align 8 + store atomic i64 %0, ptr %o monotonic, align 8 + %hi5 = getelementptr inbounds nuw i8, ptr %o, i64 8 + store atomic i64 %1, ptr %hi5 monotonic, align 8 + ret void +}