Skip to content

Commit

Permalink
[AtomicExpand] Merge cmpxchg success and failure ordering when approp…
Browse files Browse the repository at this point in the history
…riate.

If we're not emitting separate fences for the success/failure cases, we
need to pass the merged ordering to the target so it can emit the
correct instructions.

For the PowerPC testcase, we end up with extra fences, but that seems
like an improvement over missing fences.  If someone wants to improve
that, the PowerPC backed could be taught to emit the fences after isel,
instead of depending on fences emitted by AtomicExpand.

Fixes https://bugs.llvm.org/show_bug.cgi?id=33332 .

Differential Revision: https://reviews.llvm.org/D103342
  • Loading branch information
efriedma-quic committed Jun 3, 2021
1 parent 5a2aec3 commit 44cdf77
Show file tree
Hide file tree
Showing 6 changed files with 87 additions and 47 deletions.
14 changes: 14 additions & 0 deletions llvm/include/llvm/IR/Instructions.h
Expand Up @@ -626,6 +626,20 @@ class AtomicCmpXchgInst : public Instruction {
setSubclassData<FailureOrderingField>(Ordering);
}

/// Returns a single ordering which is at least as strong as both the
/// success and failure orderings for this cmpxchg.
AtomicOrdering getMergedOrdering() const {
if (getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
return AtomicOrdering::SequentiallyConsistent;
if (getFailureOrdering() == AtomicOrdering::Acquire) {
if (getSuccessOrdering() == AtomicOrdering::Monotonic)
return AtomicOrdering::Acquire;
if (getSuccessOrdering() == AtomicOrdering::Release)
return AtomicOrdering::AcquireRelease;
}
return getSuccessOrdering();
}

/// Returns the synchronization scope ID of this cmpxchg instruction.
SyncScope::ID getSyncScopeID() const {
return SSID;
Expand Down
12 changes: 7 additions & 5 deletions llvm/lib/CodeGen/AtomicExpandPass.cpp
Expand Up @@ -236,12 +236,13 @@ bool AtomicExpand::runOnFunction(Function &F) {
TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
TargetLoweringBase::AtomicExpansionKind::None &&
(isReleaseOrStronger(CASI->getSuccessOrdering()) ||
isAcquireOrStronger(CASI->getSuccessOrdering()))) {
isAcquireOrStronger(CASI->getSuccessOrdering()) ||
isAcquireOrStronger(CASI->getFailureOrdering()))) {
// If a compare and swap is lowered to LL/SC, we can do smarter fence
// insertion, with a stronger one on the success path than on the
// failure path. As a result, fence insertion is directly done by
// expandAtomicCmpXchg in that case.
FenceOrdering = CASI->getSuccessOrdering();
FenceOrdering = CASI->getMergedOrdering();
CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
CASI->setFailureOrdering(AtomicOrdering::Monotonic);
}
Expand Down Expand Up @@ -1052,7 +1053,7 @@ void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) {
"NewVal_Shifted");
Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
CI->getSuccessOrdering());
CI->getMergedOrdering());
Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
Value *Res = UndefValue::get(CI->getType());
Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
Expand Down Expand Up @@ -1167,8 +1168,9 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
// should preserve the ordering.
bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
AtomicOrdering MemOpOrder =
ShouldInsertFencesForAtomic ? AtomicOrdering::Monotonic : SuccessOrder;
AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic
? AtomicOrdering::Monotonic
: CI->getMergedOrdering();

// In implementations which use a barrier to achieve release semantics, we can
// delay emitting this barrier until we know a store is actually going to be
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll
Expand Up @@ -53,9 +53,9 @@ define void @val_compare_and_swap(i128* %p, i128 %oldval, i128 %newval) {

define void @val_compare_and_swap_monotonic_seqcst(i128* %p, i128 %oldval, i128 %newval) {
; CHECK-LLSC-O1-LABEL: val_compare_and_swap_monotonic_seqcst:
; CHECK-LLSC-O1: ldxp {{x[0-9]+}}, {{x[0-9]+}}, [x0]
; CHECK-LLSC-O1: ldaxp {{x[0-9]+}}, {{x[0-9]+}}, [x0]
; [... LOTS of stuff that is generic IR unrelated to atomic operations ...]
; CHECK-LLSC-O1: stxp {{w[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, [x0]
; CHECK-LLSC-O1: stlxp {{w[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, [x0]
;
; CHECK-CAS-O1-LABEL: val_compare_and_swap_monotonic_seqcst:
; CHECK-CAS-O1: caspal x2, x3, x4, x5, [x0]
Expand All @@ -76,7 +76,7 @@ define void @val_compare_and_swap_monotonic_seqcst(i128* %p, i128 %oldval, i128

define void @val_compare_and_swap_release_acquire(i128* %p, i128 %oldval, i128 %newval) {
; CHECK-LLSC-O1-LABEL: val_compare_and_swap_release_acquire:
; CHECK-LLSC-O1: ldxp {{x[0-9]+}}, {{x[0-9]+}}, [x0]
; CHECK-LLSC-O1: ldaxp {{x[0-9]+}}, {{x[0-9]+}}, [x0]
; [... LOTS of stuff that is generic IR unrelated to atomic operations ...]
; CHECK-LLSC-O1: stlxp {{w[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, [x0]
;
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
Expand Up @@ -214,12 +214,12 @@ define i64 @val_compare_and_swap_64_monotonic_seqcst(i64* %p, i64 %cmp, i64 %new
; CHECK-NOLSE-O1: ; %bb.0:
; CHECK-NOLSE-O1-NEXT: LBB4_1: ; %cmpxchg.start
; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT: ldxr x8, [x0]
; CHECK-NOLSE-O1-NEXT: ldaxr x8, [x0]
; CHECK-NOLSE-O1-NEXT: cmp x8, x1
; CHECK-NOLSE-O1-NEXT: b.ne LBB4_4
; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.trystore
; CHECK-NOLSE-O1-NEXT: ; in Loop: Header=BB4_1 Depth=1
; CHECK-NOLSE-O1-NEXT: stxr w9, x2, [x0]
; CHECK-NOLSE-O1-NEXT: stlxr w9, x2, [x0]
; CHECK-NOLSE-O1-NEXT: cbnz w9, LBB4_1
; CHECK-NOLSE-O1-NEXT: ; %bb.3: ; %cmpxchg.end
; CHECK-NOLSE-O1-NEXT: mov x0, x8
Expand Down Expand Up @@ -264,7 +264,7 @@ define i64 @val_compare_and_swap_64_release_acquire(i64* %p, i64 %cmp, i64 %new)
; CHECK-NOLSE-O1: ; %bb.0:
; CHECK-NOLSE-O1-NEXT: LBB5_1: ; %cmpxchg.start
; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT: ldxr x8, [x0]
; CHECK-NOLSE-O1-NEXT: ldaxr x8, [x0]
; CHECK-NOLSE-O1-NEXT: cmp x8, x1
; CHECK-NOLSE-O1-NEXT: b.ne LBB5_4
; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.trystore
Expand Down
88 changes: 56 additions & 32 deletions llvm/test/CodeGen/PowerPC/atomics-regression.ll
Expand Up @@ -488,13 +488,16 @@ define void @test44(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-NEXT: .LBB44_1:
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: cmpw 4, 6
; PPC64LE-NEXT: bne 0, .LBB44_3
; PPC64LE-NEXT: bne 0, .LBB44_4
; PPC64LE-NEXT: # %bb.2:
; PPC64LE-NEXT: stbcx. 5, 0, 3
; PPC64LE-NEXT: beqlr 0
; PPC64LE-NEXT: b .LBB44_1
; PPC64LE-NEXT: .LBB44_3:
; PPC64LE-NEXT: bne 0, .LBB44_1
; PPC64LE-NEXT: # %bb.3:
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
; PPC64LE-NEXT: .LBB44_4:
; PPC64LE-NEXT: stbcx. 6, 0, 3
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg i8* %ptr, i8 %cmp, i8 %val release acquire
ret void
Expand Down Expand Up @@ -706,13 +709,16 @@ define void @test54(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-NEXT: .LBB54_1:
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: cmpw 4, 6
; PPC64LE-NEXT: bne 0, .LBB54_3
; PPC64LE-NEXT: bne 0, .LBB54_4
; PPC64LE-NEXT: # %bb.2:
; PPC64LE-NEXT: sthcx. 5, 0, 3
; PPC64LE-NEXT: beqlr 0
; PPC64LE-NEXT: b .LBB54_1
; PPC64LE-NEXT: .LBB54_3:
; PPC64LE-NEXT: bne 0, .LBB54_1
; PPC64LE-NEXT: # %bb.3:
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
; PPC64LE-NEXT: .LBB54_4:
; PPC64LE-NEXT: sthcx. 6, 0, 3
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg i16* %ptr, i16 %cmp, i16 %val release acquire
ret void
Expand Down Expand Up @@ -919,13 +925,16 @@ define void @test64(i32* %ptr, i32 %cmp, i32 %val) {
; PPC64LE-NEXT: .LBB64_1:
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmpw 4, 6
; PPC64LE-NEXT: bne 0, .LBB64_3
; PPC64LE-NEXT: bne 0, .LBB64_4
; PPC64LE-NEXT: # %bb.2:
; PPC64LE-NEXT: stwcx. 5, 0, 3
; PPC64LE-NEXT: beqlr 0
; PPC64LE-NEXT: b .LBB64_1
; PPC64LE-NEXT: .LBB64_3:
; PPC64LE-NEXT: bne 0, .LBB64_1
; PPC64LE-NEXT: # %bb.3:
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
; PPC64LE-NEXT: .LBB64_4:
; PPC64LE-NEXT: stwcx. 6, 0, 3
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg i32* %ptr, i32 %cmp, i32 %val release acquire
ret void
Expand Down Expand Up @@ -1127,13 +1136,16 @@ define void @test74(i64* %ptr, i64 %cmp, i64 %val) {
; PPC64LE-NEXT: .LBB74_1:
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpd 4, 6
; PPC64LE-NEXT: bne 0, .LBB74_3
; PPC64LE-NEXT: bne 0, .LBB74_4
; PPC64LE-NEXT: # %bb.2:
; PPC64LE-NEXT: stdcx. 5, 0, 3
; PPC64LE-NEXT: beqlr 0
; PPC64LE-NEXT: b .LBB74_1
; PPC64LE-NEXT: .LBB74_3:
; PPC64LE-NEXT: bne 0, .LBB74_1
; PPC64LE-NEXT: # %bb.3:
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
; PPC64LE-NEXT: .LBB74_4:
; PPC64LE-NEXT: stdcx. 6, 0, 3
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg i64* %ptr, i64 %cmp, i64 %val release acquire
ret void
Expand Down Expand Up @@ -1340,13 +1352,16 @@ define void @test84(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-NEXT: .LBB84_1:
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: cmpw 4, 6
; PPC64LE-NEXT: bne 0, .LBB84_3
; PPC64LE-NEXT: bne 0, .LBB84_4
; PPC64LE-NEXT: # %bb.2:
; PPC64LE-NEXT: stbcx. 5, 0, 3
; PPC64LE-NEXT: beqlr 0
; PPC64LE-NEXT: b .LBB84_1
; PPC64LE-NEXT: .LBB84_3:
; PPC64LE-NEXT: bne 0, .LBB84_1
; PPC64LE-NEXT: # %bb.3:
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
; PPC64LE-NEXT: .LBB84_4:
; PPC64LE-NEXT: stbcx. 6, 0, 3
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg i8* %ptr, i8 %cmp, i8 %val syncscope("singlethread") release acquire
ret void
Expand Down Expand Up @@ -1558,13 +1573,16 @@ define void @test94(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-NEXT: .LBB94_1:
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: cmpw 4, 6
; PPC64LE-NEXT: bne 0, .LBB94_3
; PPC64LE-NEXT: bne 0, .LBB94_4
; PPC64LE-NEXT: # %bb.2:
; PPC64LE-NEXT: sthcx. 5, 0, 3
; PPC64LE-NEXT: beqlr 0
; PPC64LE-NEXT: b .LBB94_1
; PPC64LE-NEXT: .LBB94_3:
; PPC64LE-NEXT: bne 0, .LBB94_1
; PPC64LE-NEXT: # %bb.3:
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
; PPC64LE-NEXT: .LBB94_4:
; PPC64LE-NEXT: sthcx. 6, 0, 3
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg i16* %ptr, i16 %cmp, i16 %val syncscope("singlethread") release acquire
ret void
Expand Down Expand Up @@ -1771,13 +1789,16 @@ define void @test104(i32* %ptr, i32 %cmp, i32 %val) {
; PPC64LE-NEXT: .LBB104_1:
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmpw 4, 6
; PPC64LE-NEXT: bne 0, .LBB104_3
; PPC64LE-NEXT: bne 0, .LBB104_4
; PPC64LE-NEXT: # %bb.2:
; PPC64LE-NEXT: stwcx. 5, 0, 3
; PPC64LE-NEXT: beqlr 0
; PPC64LE-NEXT: b .LBB104_1
; PPC64LE-NEXT: .LBB104_3:
; PPC64LE-NEXT: bne 0, .LBB104_1
; PPC64LE-NEXT: # %bb.3:
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
; PPC64LE-NEXT: .LBB104_4:
; PPC64LE-NEXT: stwcx. 6, 0, 3
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg i32* %ptr, i32 %cmp, i32 %val syncscope("singlethread") release acquire
ret void
Expand Down Expand Up @@ -1979,13 +2000,16 @@ define void @test114(i64* %ptr, i64 %cmp, i64 %val) {
; PPC64LE-NEXT: .LBB114_1:
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpd 4, 6
; PPC64LE-NEXT: bne 0, .LBB114_3
; PPC64LE-NEXT: bne 0, .LBB114_4
; PPC64LE-NEXT: # %bb.2:
; PPC64LE-NEXT: stdcx. 5, 0, 3
; PPC64LE-NEXT: beqlr 0
; PPC64LE-NEXT: b .LBB114_1
; PPC64LE-NEXT: .LBB114_3:
; PPC64LE-NEXT: bne 0, .LBB114_1
; PPC64LE-NEXT: # %bb.3:
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
; PPC64LE-NEXT: .LBB114_4:
; PPC64LE-NEXT: stdcx. 6, 0, 3
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg i64* %ptr, i64 %cmp, i64 %val syncscope("singlethread") release acquire
ret void
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll
Expand Up @@ -337,7 +337,7 @@ define void @cmpxchg_i8_release_acquire(i8* %ptr, i8 %cmp, i8 %val) nounwind {
; RV32IA-NEXT: andi a2, a2, 255
; RV32IA-NEXT: sll a0, a2, a0
; RV32IA-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
; RV32IA-NEXT: lr.w a2, (a3)
; RV32IA-NEXT: lr.w.aq a2, (a3)
; RV32IA-NEXT: and a5, a2, a4
; RV32IA-NEXT: bne a5, a1, .LBB4_3
; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1
Expand Down Expand Up @@ -373,7 +373,7 @@ define void @cmpxchg_i8_release_acquire(i8* %ptr, i8 %cmp, i8 %val) nounwind {
; RV64IA-NEXT: andi a2, a2, 255
; RV64IA-NEXT: sllw a0, a2, a0
; RV64IA-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
; RV64IA-NEXT: lr.w a2, (a3)
; RV64IA-NEXT: lr.w.aq a2, (a3)
; RV64IA-NEXT: and a5, a2, a4
; RV64IA-NEXT: bne a5, a1, .LBB4_3
; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1
Expand Down Expand Up @@ -1106,7 +1106,7 @@ define void @cmpxchg_i16_release_acquire(i16* %ptr, i16 %cmp, i16 %val) nounwind
; RV32IA-NEXT: and a2, a2, a4
; RV32IA-NEXT: sll a0, a2, a0
; RV32IA-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1
; RV32IA-NEXT: lr.w a2, (a3)
; RV32IA-NEXT: lr.w.aq a2, (a3)
; RV32IA-NEXT: and a4, a2, a5
; RV32IA-NEXT: bne a4, a1, .LBB14_3
; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1
Expand Down Expand Up @@ -1143,7 +1143,7 @@ define void @cmpxchg_i16_release_acquire(i16* %ptr, i16 %cmp, i16 %val) nounwind
; RV64IA-NEXT: and a2, a2, a4
; RV64IA-NEXT: sllw a0, a2, a0
; RV64IA-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1
; RV64IA-NEXT: lr.w a2, (a3)
; RV64IA-NEXT: lr.w.aq a2, (a3)
; RV64IA-NEXT: and a4, a2, a5
; RV64IA-NEXT: bne a4, a1, .LBB14_3
; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1
Expand Down

0 comments on commit 44cdf77

Please sign in to comment.