Skip to content

Commit

Permalink
[AArch64] Fix LSE2/LSE128/RCPC3 precedence
Browse files Browse the repository at this point in the history
D142712 added tests for when both lse2 and lse128 are available, but
in practice there is no way to enable LSE128 without LSE2 from clang:
LSE128 is a v9 only feature and LSE2 has been mandatory since v8.4,
and +/-lse2 can not be specified on the clang command line.

Therefore it makes more sense that lse2+lse128 should emit lse128
instructions, otherwise they will not be emitted at all.

It also makes sense to remove the lse128-only backend tests if that set
of attributes is never set by the frontend.

Differential Revision: https://reviews.llvm.org/D143506
  • Loading branch information
tmatheson-arm committed Feb 13, 2023
1 parent e698c59 commit 9e3010a
Show file tree
Hide file tree
Showing 15 changed files with 46 additions and 19,538 deletions.
44 changes: 41 additions & 3 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Expand Up @@ -71,6 +71,7 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
Expand Down Expand Up @@ -22555,6 +22556,28 @@ bool AArch64TargetLowering::isOpSuitableForLDPSTP(const Instruction *I) const {
return false;
}

bool AArch64TargetLowering::isOpSuitableForLSE128(const Instruction *I) const {
if (!Subtarget->hasLSE128())
return false;

// Only use SWPP for stores where LSE2 would require a fence. Unlike STP, SWPP
// will clobber the two registers.
if (const auto *SI = dyn_cast<StoreInst>(I))
return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
SI->getAlign() >= Align(16) &&
(SI->getOrdering() == AtomicOrdering::Release ||
SI->getOrdering() == AtomicOrdering::SequentiallyConsistent);

if (const auto *RMW = dyn_cast<AtomicRMWInst>(I))
return RMW->getValOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
RMW->getAlign() >= Align(16) &&
(RMW->getOperation() == AtomicRMWInst::Xchg ||
RMW->getOperation() == AtomicRMWInst::And ||
RMW->getOperation() == AtomicRMWInst::Or);

return false;
}

bool AArch64TargetLowering::isOpSuitableForRCPC3(const Instruction *I) const {
if (!Subtarget->hasLSE2() || !Subtarget->hasRCPC3())
return false;
Expand All @@ -22576,7 +22599,11 @@ bool AArch64TargetLowering::shouldInsertFencesForAtomic(
const Instruction *I) const {
if (isOpSuitableForRCPC3(I))
return false;
return isOpSuitableForLDPSTP(I);
if (isOpSuitableForLSE128(I))
return false;
if (isOpSuitableForLDPSTP(I))
return true;
return false;
}

bool AArch64TargetLowering::shouldInsertTrailingFenceForAtomicStore(
Expand Down Expand Up @@ -22609,7 +22636,13 @@ bool AArch64TargetLowering::shouldInsertTrailingFenceForAtomicStore(
TargetLoweringBase::AtomicExpansionKind
AArch64TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
if (Size != 128 || isOpSuitableForLDPSTP(SI) || isOpSuitableForRCPC3(SI))
if (Size != 128)
return AtomicExpansionKind::None;
if (isOpSuitableForRCPC3(SI))
return AtomicExpansionKind::None;
if (isOpSuitableForLSE128(SI))
return AtomicExpansionKind::Expand;
if (isOpSuitableForLDPSTP(SI))
return AtomicExpansionKind::None;
return AtomicExpansionKind::Expand;
}
Expand All @@ -22621,7 +22654,12 @@ TargetLowering::AtomicExpansionKind
AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
unsigned Size = LI->getType()->getPrimitiveSizeInBits();

if (Size != 128 || isOpSuitableForLDPSTP(LI) || isOpSuitableForRCPC3(LI))
if (Size != 128)
return AtomicExpansionKind::None;
if (isOpSuitableForRCPC3(LI))
return AtomicExpansionKind::None;
// No LSE128 loads
if (isOpSuitableForLDPSTP(LI))
return AtomicExpansionKind::None;

// At -O0, fast-regalloc cannot cope with the live vregs necessary to
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Expand Up @@ -713,6 +713,7 @@ class AArch64TargetLowering : public TargetLowering {
void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override;

bool isOpSuitableForLDPSTP(const Instruction *I) const;
bool isOpSuitableForLSE128(const Instruction *I) const;
bool isOpSuitableForRCPC3(const Instruction *I) const;
bool shouldInsertFencesForAtomic(const Instruction *I) const override;
bool
Expand Down

0 comments on commit 9e3010a

Please sign in to comment.