Skip to content

Commit

Permalink
[PowerPC][Atomics] Remove redundant block to clear reservation (#68430)
Browse files Browse the repository at this point in the history
This PR is following what https://reviews.llvm.org/D134783 does for
quardword CAS.
  • Loading branch information
bzEq committed Oct 13, 2023
1 parent bf90ffb commit 3104681
Show file tree
Hide file tree
Showing 2 changed files with 132 additions and 58 deletions.
16 changes: 3 additions & 13 deletions llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp
Expand Up @@ -239,23 +239,18 @@ bool PPCExpandAtomicPseudo::expandAtomicCmpSwap128(
// loop:
// old = lqarx ptr
// <compare old, cmp>
// bne 0, fail
// bne 0, exit
// succ:
// stqcx new ptr
// bne 0, loop
// b exit
// fail:
// stqcx old ptr
// exit:
// ....
MachineFunction::iterator MFI = ++MBB.getIterator();
MachineBasicBlock *LoopCmpMBB = MF->CreateMachineBasicBlock(BB);
MachineBasicBlock *CmpSuccMBB = MF->CreateMachineBasicBlock(BB);
MachineBasicBlock *CmpFailMBB = MF->CreateMachineBasicBlock(BB);
MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(BB);
MF->insert(MFI, LoopCmpMBB);
MF->insert(MFI, CmpSuccMBB);
MF->insert(MFI, CmpFailMBB);
MF->insert(MFI, ExitMBB);
ExitMBB->splice(ExitMBB->begin(), &MBB, std::next(MI.getIterator()),
MBB.end());
Expand All @@ -276,9 +271,9 @@ bool PPCExpandAtomicPseudo::expandAtomicCmpSwap128(
BuildMI(CurrentMBB, DL, TII->get(PPC::BCC))
.addImm(PPC::PRED_NE)
.addReg(PPC::CR0)
.addMBB(CmpFailMBB);
.addMBB(ExitMBB);
CurrentMBB->addSuccessor(CmpSuccMBB);
CurrentMBB->addSuccessor(CmpFailMBB);
CurrentMBB->addSuccessor(ExitMBB);
// Build succ.
CurrentMBB = CmpSuccMBB;
PairedCopy(TII, *CurrentMBB, CurrentMBB->end(), DL, ScratchHi, ScratchLo,
Expand All @@ -288,16 +283,11 @@ bool PPCExpandAtomicPseudo::expandAtomicCmpSwap128(
.addImm(PPC::PRED_NE)
.addReg(PPC::CR0)
.addMBB(LoopCmpMBB);
BuildMI(CurrentMBB, DL, TII->get(PPC::B)).addMBB(ExitMBB);
CurrentMBB->addSuccessor(LoopCmpMBB);
CurrentMBB->addSuccessor(ExitMBB);
CurrentMBB = CmpFailMBB;
BuildMI(CurrentMBB, DL, SC).addReg(Old).addReg(RA).addReg(RB);
CurrentMBB->addSuccessor(ExitMBB);

recomputeLiveIns(*LoopCmpMBB);
recomputeLiveIns(*CmpSuccMBB);
recomputeLiveIns(*CmpFailMBB);
recomputeLiveIns(*ExitMBB);
NMBBI = MBB.end();
MI.eraseFromParent();
Expand Down
174 changes: 129 additions & 45 deletions llvm/test/CodeGen/PowerPC/atomics-i128.ll
Expand Up @@ -986,10 +986,7 @@ define i128 @cas_weak_acquire_acquire(ptr %a, i128 %cmp, i128 %new) {
; CHECK-NEXT: mr r10, r6
; CHECK-NEXT: stqcx. r10, 0, r3
; CHECK-NEXT: bne cr0, .LBB7_1
; CHECK-NEXT: b .LBB7_4
; CHECK-NEXT: .LBB7_3: # %entry
; CHECK-NEXT: stqcx. r8, 0, r3
; CHECK-NEXT: .LBB7_4: # %entry
; CHECK-NEXT: lwsync
; CHECK-NEXT: mr r3, r8
; CHECK-NEXT: mr r4, r9
Expand Down Expand Up @@ -1033,10 +1030,7 @@ define i128 @cas_weak_acquire_acquire(ptr %a, i128 %cmp, i128 %new) {
; LE-PWR8-NEXT: mr r10, r7
; LE-PWR8-NEXT: stqcx. r10, 0, r3
; LE-PWR8-NEXT: bne cr0, .LBB7_1
; LE-PWR8-NEXT: b .LBB7_4
; LE-PWR8-NEXT: .LBB7_3: # %entry
; LE-PWR8-NEXT: stqcx. r8, 0, r3
; LE-PWR8-NEXT: .LBB7_4: # %entry
; LE-PWR8-NEXT: lwsync
; LE-PWR8-NEXT: mr r3, r9
; LE-PWR8-NEXT: mr r4, r8
Expand All @@ -1057,10 +1051,7 @@ define i128 @cas_weak_acquire_acquire(ptr %a, i128 %cmp, i128 %new) {
; AIX64-PWR8-NEXT: mr r10, r6
; AIX64-PWR8-NEXT: stqcx. r10, 0, r3
; AIX64-PWR8-NEXT: bne cr0, L..BB7_1
; AIX64-PWR8-NEXT: b L..BB7_4
; AIX64-PWR8-NEXT: L..BB7_3: # %entry
; AIX64-PWR8-NEXT: stqcx. r8, 0, r3
; AIX64-PWR8-NEXT: L..BB7_4: # %entry
; AIX64-PWR8-NEXT: lwsync
; AIX64-PWR8-NEXT: mr r3, r8
; AIX64-PWR8-NEXT: mr r4, r9
Expand Down Expand Up @@ -1121,10 +1112,7 @@ define i128 @cas_weak_release_monotonic(ptr %a, i128 %cmp, i128 %new) {
; CHECK-NEXT: mr r10, r6
; CHECK-NEXT: stqcx. r10, 0, r3
; CHECK-NEXT: bne cr0, .LBB8_1
; CHECK-NEXT: b .LBB8_4
; CHECK-NEXT: .LBB8_3: # %entry
; CHECK-NEXT: stqcx. r8, 0, r3
; CHECK-NEXT: .LBB8_4: # %entry
; CHECK-NEXT: mr r3, r8
; CHECK-NEXT: mr r4, r9
; CHECK-NEXT: blr
Expand Down Expand Up @@ -1168,10 +1156,7 @@ define i128 @cas_weak_release_monotonic(ptr %a, i128 %cmp, i128 %new) {
; LE-PWR8-NEXT: mr r10, r7
; LE-PWR8-NEXT: stqcx. r10, 0, r3
; LE-PWR8-NEXT: bne cr0, .LBB8_1
; LE-PWR8-NEXT: b .LBB8_4
; LE-PWR8-NEXT: .LBB8_3: # %entry
; LE-PWR8-NEXT: stqcx. r8, 0, r3
; LE-PWR8-NEXT: .LBB8_4: # %entry
; LE-PWR8-NEXT: mr r3, r9
; LE-PWR8-NEXT: mr r4, r8
; LE-PWR8-NEXT: blr
Expand All @@ -1192,10 +1177,7 @@ define i128 @cas_weak_release_monotonic(ptr %a, i128 %cmp, i128 %new) {
; AIX64-PWR8-NEXT: mr r10, r6
; AIX64-PWR8-NEXT: stqcx. r10, 0, r3
; AIX64-PWR8-NEXT: bne cr0, L..BB8_1
; AIX64-PWR8-NEXT: b L..BB8_4
; AIX64-PWR8-NEXT: L..BB8_3: # %entry
; AIX64-PWR8-NEXT: stqcx. r8, 0, r3
; AIX64-PWR8-NEXT: L..BB8_4: # %entry
; AIX64-PWR8-NEXT: mr r3, r8
; AIX64-PWR8-NEXT: mr r4, r9
; AIX64-PWR8-NEXT: blr
Expand Down Expand Up @@ -1255,10 +1237,7 @@ define i128 @cas_sc_sc(ptr %a, i128 %cmp, i128 %new) {
; CHECK-NEXT: mr r10, r6
; CHECK-NEXT: stqcx. r10, 0, r3
; CHECK-NEXT: bne cr0, .LBB9_1
; CHECK-NEXT: b .LBB9_4
; CHECK-NEXT: .LBB9_3: # %entry
; CHECK-NEXT: stqcx. r8, 0, r3
; CHECK-NEXT: .LBB9_4: # %entry
; CHECK-NEXT: lwsync
; CHECK-NEXT: mr r3, r8
; CHECK-NEXT: mr r4, r9
Expand Down Expand Up @@ -1303,10 +1282,7 @@ define i128 @cas_sc_sc(ptr %a, i128 %cmp, i128 %new) {
; LE-PWR8-NEXT: mr r10, r7
; LE-PWR8-NEXT: stqcx. r10, 0, r3
; LE-PWR8-NEXT: bne cr0, .LBB9_1
; LE-PWR8-NEXT: b .LBB9_4
; LE-PWR8-NEXT: .LBB9_3: # %entry
; LE-PWR8-NEXT: stqcx. r8, 0, r3
; LE-PWR8-NEXT: .LBB9_4: # %entry
; LE-PWR8-NEXT: lwsync
; LE-PWR8-NEXT: mr r3, r9
; LE-PWR8-NEXT: mr r4, r8
Expand All @@ -1328,10 +1304,7 @@ define i128 @cas_sc_sc(ptr %a, i128 %cmp, i128 %new) {
; AIX64-PWR8-NEXT: mr r10, r6
; AIX64-PWR8-NEXT: stqcx. r10, 0, r3
; AIX64-PWR8-NEXT: bne cr0, L..BB9_1
; AIX64-PWR8-NEXT: b L..BB9_4
; AIX64-PWR8-NEXT: L..BB9_3: # %entry
; AIX64-PWR8-NEXT: stqcx. r8, 0, r3
; AIX64-PWR8-NEXT: L..BB9_4: # %entry
; AIX64-PWR8-NEXT: lwsync
; AIX64-PWR8-NEXT: mr r3, r8
; AIX64-PWR8-NEXT: mr r4, r9
Expand Down Expand Up @@ -1392,10 +1365,7 @@ define i128 @cas_acqrel_acquire(ptr %a, i128 %cmp, i128 %new) {
; CHECK-NEXT: mr r10, r6
; CHECK-NEXT: stqcx. r10, 0, r3
; CHECK-NEXT: bne cr0, .LBB10_1
; CHECK-NEXT: b .LBB10_4
; CHECK-NEXT: .LBB10_3: # %entry
; CHECK-NEXT: stqcx. r8, 0, r3
; CHECK-NEXT: .LBB10_4: # %entry
; CHECK-NEXT: lwsync
; CHECK-NEXT: mr r3, r8
; CHECK-NEXT: mr r4, r9
Expand Down Expand Up @@ -1440,10 +1410,7 @@ define i128 @cas_acqrel_acquire(ptr %a, i128 %cmp, i128 %new) {
; LE-PWR8-NEXT: mr r10, r7
; LE-PWR8-NEXT: stqcx. r10, 0, r3
; LE-PWR8-NEXT: bne cr0, .LBB10_1
; LE-PWR8-NEXT: b .LBB10_4
; LE-PWR8-NEXT: .LBB10_3: # %entry
; LE-PWR8-NEXT: stqcx. r8, 0, r3
; LE-PWR8-NEXT: .LBB10_4: # %entry
; LE-PWR8-NEXT: lwsync
; LE-PWR8-NEXT: mr r3, r9
; LE-PWR8-NEXT: mr r4, r8
Expand All @@ -1465,10 +1432,7 @@ define i128 @cas_acqrel_acquire(ptr %a, i128 %cmp, i128 %new) {
; AIX64-PWR8-NEXT: mr r10, r6
; AIX64-PWR8-NEXT: stqcx. r10, 0, r3
; AIX64-PWR8-NEXT: bne cr0, L..BB10_1
; AIX64-PWR8-NEXT: b L..BB10_4
; AIX64-PWR8-NEXT: L..BB10_3: # %entry
; AIX64-PWR8-NEXT: stqcx. r8, 0, r3
; AIX64-PWR8-NEXT: L..BB10_4: # %entry
; AIX64-PWR8-NEXT: lwsync
; AIX64-PWR8-NEXT: mr r3, r8
; AIX64-PWR8-NEXT: mr r4, r9
Expand Down Expand Up @@ -1529,10 +1493,7 @@ define i1 @cas_acqrel_acquire_check_succ(ptr %a, i128 %cmp, i128 %new) {
; CHECK-NEXT: mr r10, r6
; CHECK-NEXT: stqcx. r10, 0, r3
; CHECK-NEXT: bne cr0, .LBB11_1
; CHECK-NEXT: b .LBB11_4
; CHECK-NEXT: .LBB11_3: # %entry
; CHECK-NEXT: stqcx. r8, 0, r3
; CHECK-NEXT: .LBB11_4: # %entry
; CHECK-NEXT: lwsync
; CHECK-NEXT: xor r3, r4, r8
; CHECK-NEXT: xor r4, r5, r9
Expand Down Expand Up @@ -1578,10 +1539,7 @@ define i1 @cas_acqrel_acquire_check_succ(ptr %a, i128 %cmp, i128 %new) {
; LE-PWR8-NEXT: mr r10, r7
; LE-PWR8-NEXT: stqcx. r10, 0, r3
; LE-PWR8-NEXT: bne cr0, .LBB11_1
; LE-PWR8-NEXT: b .LBB11_4
; LE-PWR8-NEXT: .LBB11_3: # %entry
; LE-PWR8-NEXT: stqcx. r8, 0, r3
; LE-PWR8-NEXT: .LBB11_4: # %entry
; LE-PWR8-NEXT: lwsync
; LE-PWR8-NEXT: xor r3, r5, r8
; LE-PWR8-NEXT: xor r4, r4, r9
Expand All @@ -1606,10 +1564,7 @@ define i1 @cas_acqrel_acquire_check_succ(ptr %a, i128 %cmp, i128 %new) {
; AIX64-PWR8-NEXT: mr r10, r6
; AIX64-PWR8-NEXT: stqcx. r10, 0, r3
; AIX64-PWR8-NEXT: bne cr0, L..BB11_1
; AIX64-PWR8-NEXT: b L..BB11_4
; AIX64-PWR8-NEXT: L..BB11_3: # %entry
; AIX64-PWR8-NEXT: stqcx. r8, 0, r3
; AIX64-PWR8-NEXT: L..BB11_4: # %entry
; AIX64-PWR8-NEXT: lwsync
; AIX64-PWR8-NEXT: xor r3, r4, r8
; AIX64-PWR8-NEXT: xor r4, r5, r9
Expand Down Expand Up @@ -1651,3 +1606,132 @@ entry:
%1 = extractvalue { i128, i1 } %0, 1
ret i1 %1
}

;; TODO: Optimize CAS at exit block when bool value is returned.
define i1 @bool_cas_weak_acquire_acquire(ptr %a, i128 %cmp, i128 %new) {
; CHECK-LABEL: bool_cas_weak_acquire_acquire:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: .LBB12_1: # %entry
; CHECK-NEXT: #
; CHECK-NEXT: lqarx r8, 0, r3
; CHECK-NEXT: xor r11, r9, r5
; CHECK-NEXT: xor r10, r8, r4
; CHECK-NEXT: or. r11, r11, r10
; CHECK-NEXT: bne cr0, .LBB12_3
; CHECK-NEXT: # %bb.2: # %entry
; CHECK-NEXT: #
; CHECK-NEXT: mr r11, r7
; CHECK-NEXT: mr r10, r6
; CHECK-NEXT: stqcx. r10, 0, r3
; CHECK-NEXT: bne cr0, .LBB12_1
; CHECK-NEXT: .LBB12_3: # %entry
; CHECK-NEXT: lwsync
; CHECK-NEXT: xor r3, r4, r8
; CHECK-NEXT: xor r4, r5, r9
; CHECK-NEXT: or r3, r4, r3
; CHECK-NEXT: cntlzd r3, r3
; CHECK-NEXT: rldicl r3, r3, 58, 63
; CHECK-NEXT: blr
;
; PWR7-LABEL: bool_cas_weak_acquire_acquire:
; PWR7: # %bb.0: # %entry
; PWR7-NEXT: mflr r0
; PWR7-NEXT: stdu r1, -128(r1)
; PWR7-NEXT: std r0, 144(r1)
; PWR7-NEXT: .cfi_def_cfa_offset 128
; PWR7-NEXT: .cfi_offset lr, 16
; PWR7-NEXT: std r5, 120(r1)
; PWR7-NEXT: std r4, 112(r1)
; PWR7-NEXT: addi r4, r1, 112
; PWR7-NEXT: mr r5, r6
; PWR7-NEXT: mr r6, r7
; PWR7-NEXT: li r7, 2
; PWR7-NEXT: li r8, 2
; PWR7-NEXT: bl __atomic_compare_exchange_16
; PWR7-NEXT: nop
; PWR7-NEXT: addi r1, r1, 128
; PWR7-NEXT: ld r0, 16(r1)
; PWR7-NEXT: mtlr r0
; PWR7-NEXT: blr
;
; LE-PWR8-LABEL: bool_cas_weak_acquire_acquire:
; LE-PWR8: # %bb.0: # %entry
; LE-PWR8-NEXT: .LBB12_1: # %entry
; LE-PWR8-NEXT: #
; LE-PWR8-NEXT: lqarx r8, 0, r3
; LE-PWR8-NEXT: xor r11, r9, r4
; LE-PWR8-NEXT: xor r10, r8, r5
; LE-PWR8-NEXT: or. r11, r11, r10
; LE-PWR8-NEXT: bne cr0, .LBB12_3
; LE-PWR8-NEXT: # %bb.2: # %entry
; LE-PWR8-NEXT: #
; LE-PWR8-NEXT: mr r11, r6
; LE-PWR8-NEXT: mr r10, r7
; LE-PWR8-NEXT: stqcx. r10, 0, r3
; LE-PWR8-NEXT: bne cr0, .LBB12_1
; LE-PWR8-NEXT: .LBB12_3: # %entry
; LE-PWR8-NEXT: lwsync
; LE-PWR8-NEXT: xor r3, r5, r8
; LE-PWR8-NEXT: xor r4, r4, r9
; LE-PWR8-NEXT: or r3, r4, r3
; LE-PWR8-NEXT: cntlzd r3, r3
; LE-PWR8-NEXT: rldicl r3, r3, 58, 63
; LE-PWR8-NEXT: blr
;
; AIX64-PWR8-LABEL: bool_cas_weak_acquire_acquire:
; AIX64-PWR8: # %bb.0: # %entry
; AIX64-PWR8-NEXT: L..BB12_1: # %entry
; AIX64-PWR8-NEXT: #
; AIX64-PWR8-NEXT: lqarx r8, 0, r3
; AIX64-PWR8-NEXT: xor r11, r9, r5
; AIX64-PWR8-NEXT: xor r10, r8, r4
; AIX64-PWR8-NEXT: or. r11, r11, r10
; AIX64-PWR8-NEXT: bne cr0, L..BB12_3
; AIX64-PWR8-NEXT: # %bb.2: # %entry
; AIX64-PWR8-NEXT: #
; AIX64-PWR8-NEXT: mr r11, r7
; AIX64-PWR8-NEXT: mr r10, r6
; AIX64-PWR8-NEXT: stqcx. r10, 0, r3
; AIX64-PWR8-NEXT: bne cr0, L..BB12_1
; AIX64-PWR8-NEXT: L..BB12_3: # %entry
; AIX64-PWR8-NEXT: lwsync
; AIX64-PWR8-NEXT: xor r3, r4, r8
; AIX64-PWR8-NEXT: xor r4, r5, r9
; AIX64-PWR8-NEXT: or r3, r4, r3
; AIX64-PWR8-NEXT: cntlzd r3, r3
; AIX64-PWR8-NEXT: rldicl r3, r3, 58, 63
; AIX64-PWR8-NEXT: blr
;
; PPC-PWR8-LABEL: bool_cas_weak_acquire_acquire:
; PPC-PWR8: # %bb.0: # %entry
; PPC-PWR8-NEXT: mflr r0
; PPC-PWR8-NEXT: stwu r1, -48(r1)
; PPC-PWR8-NEXT: stw r0, 52(r1)
; PPC-PWR8-NEXT: .cfi_def_cfa_offset 48
; PPC-PWR8-NEXT: .cfi_offset lr, 4
; PPC-PWR8-NEXT: mr r4, r3
; PPC-PWR8-NEXT: lwz r3, 60(r1)
; PPC-PWR8-NEXT: stw r8, 44(r1)
; PPC-PWR8-NEXT: stw r7, 40(r1)
; PPC-PWR8-NEXT: stw r6, 36(r1)
; PPC-PWR8-NEXT: stw r5, 32(r1)
; PPC-PWR8-NEXT: addi r5, r1, 32
; PPC-PWR8-NEXT: addi r6, r1, 16
; PPC-PWR8-NEXT: li r7, 2
; PPC-PWR8-NEXT: li r8, 2
; PPC-PWR8-NEXT: stw r10, 20(r1)
; PPC-PWR8-NEXT: stw r9, 16(r1)
; PPC-PWR8-NEXT: stw r3, 28(r1)
; PPC-PWR8-NEXT: lwz r3, 56(r1)
; PPC-PWR8-NEXT: stw r3, 24(r1)
; PPC-PWR8-NEXT: li r3, 16
; PPC-PWR8-NEXT: bl __atomic_compare_exchange
; PPC-PWR8-NEXT: lwz r0, 52(r1)
; PPC-PWR8-NEXT: addi r1, r1, 48
; PPC-PWR8-NEXT: mtlr r0
; PPC-PWR8-NEXT: blr
entry:
%0 = cmpxchg weak ptr %a, i128 %cmp, i128 %new acquire acquire
%1 = extractvalue { i128, i1 } %0, 1
ret i1 %1
}

0 comments on commit 3104681

Please sign in to comment.