diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp index 5640e16f1f70ca..52847876688b41 100644 --- a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp @@ -347,20 +347,23 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto TailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); // Insert new MBBs. MF->insert(++MBB.getIterator(), LoopHeadMBB); MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB); MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB); - MF->insert(++LoopTailMBB->getIterator(), DoneMBB); + MF->insert(++LoopTailMBB->getIterator(), TailMBB); + MF->insert(++TailMBB->getIterator(), DoneMBB); // Set up successors and transfer remaining instructions to DoneMBB. LoopHeadMBB->addSuccessor(LoopIfBodyMBB); LoopHeadMBB->addSuccessor(LoopTailMBB); LoopIfBodyMBB->addSuccessor(LoopTailMBB); LoopTailMBB->addSuccessor(LoopHeadMBB); - LoopTailMBB->addSuccessor(DoneMBB); + LoopTailMBB->addSuccessor(TailMBB); + TailMBB->addSuccessor(DoneMBB); DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end()); DoneMBB->transferSuccessors(&MBB); MBB.addSuccessor(LoopHeadMBB); @@ -427,7 +430,10 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( BuildMI(LoopTailMBB, DL, TII->get(LoongArch::BEQZ)) .addReg(Scratch1Reg) .addMBB(LoopHeadMBB); - BuildMI(LoopTailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0x700); + + // .tail: + // dbar 0x700 + BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0x700); NextMBBI = MBB.end(); MI.eraseFromParent(); @@ -436,6 +442,7 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( computeAndAddLiveIns(LiveRegs, *LoopHeadMBB); computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB); computeAndAddLiveIns(LiveRegs, *LoopTailMBB); + computeAndAddLiveIns(LiveRegs, *TailMBB); computeAndAddLiveIns(LiveRegs, *DoneMBB); return true; diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll index fcbed0edb3d16f..e84ebd94fa105a 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 +; RUN: llc --mtriple=loongarch64 --verify-machineinstrs < %s | \ +; RUN: FileCheck %s --check-prefix=LA64 ;; TODO: Testing for LA32 architecture will be added later @@ -28,8 +29,9 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind { ; LA64-NEXT: .LBB0_3: # in Loop: Header=BB0_1 Depth=1 ; LA64-NEXT: sc.w $a5, $a2, 0 ; LA64-NEXT: beqz $a5, .LBB0_1 -; LA64-NEXT: dbar 1792 ; LA64-NEXT: # %bb.4: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: # %bb.5: ; LA64-NEXT: srl.w $a0, $a4, $a0 ; LA64-NEXT: ret %1 = atomicrmw umax ptr %a, i8 %b acquire @@ -62,8 +64,9 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind { ; LA64-NEXT: .LBB1_3: # in Loop: Header=BB1_1 Depth=1 ; LA64-NEXT: sc.w $a5, $a2, 0 ; LA64-NEXT: beqz $a5, .LBB1_1 -; LA64-NEXT: dbar 1792 ; LA64-NEXT: # %bb.4: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: # %bb.5: ; LA64-NEXT: srl.w $a0, $a4, $a0 ; LA64-NEXT: ret %1 = atomicrmw umax ptr %a, i16 %b acquire @@ -115,8 +118,9 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind { ; LA64-NEXT: .LBB4_3: # in Loop: Header=BB4_1 Depth=1 ; LA64-NEXT: sc.w $a5, $a2, 0 ; LA64-NEXT: beqz $a5, .LBB4_1 -; LA64-NEXT: dbar 1792 ; LA64-NEXT: # %bb.4: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: # %bb.5: ; LA64-NEXT: srl.w $a0, $a4, $a0 ; LA64-NEXT: ret %1 = atomicrmw umin ptr %a, i8 %b acquire @@ -149,8 +153,9 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind { ; LA64-NEXT: .LBB5_3: # in Loop: Header=BB5_1 Depth=1 ; LA64-NEXT: sc.w $a5, $a2, 0 ; LA64-NEXT: beqz $a5, .LBB5_1 -; LA64-NEXT: dbar 1792 ; LA64-NEXT: # %bb.4: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: # %bb.5: ; LA64-NEXT: srl.w $a0, $a4, $a0 ; LA64-NEXT: ret %1 = atomicrmw umin ptr %a, i16 %b acquire