diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp index 51df0463e23524..eb78ef065b2108 100644 --- a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp @@ -153,18 +153,12 @@ static void doAtomicBinOpExpansion(const LoongArchInstrInfo *TII, Register ScratchReg = MI.getOperand(1).getReg(); Register AddrReg = MI.getOperand(2).getReg(); Register IncrReg = MI.getOperand(3).getReg(); - AtomicOrdering Ordering = - static_cast(MI.getOperand(4).getImm()); // .loop: - // if(Ordering != AtomicOrdering::Monotonic) - // dbar 0 // ll.[w|d] dest, (addr) // binop scratch, dest, val // sc.[w|d] scratch, scratch, (addr) // beqz scratch, loop - if (Ordering != AtomicOrdering::Monotonic) - BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); BuildMI(LoopMBB, DL, TII->get(Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg) .addReg(AddrReg) @@ -251,12 +245,8 @@ static void doMaskedAtomicBinOpExpansion( Register AddrReg = MI.getOperand(2).getReg(); Register IncrReg = MI.getOperand(3).getReg(); Register MaskReg = MI.getOperand(4).getReg(); - AtomicOrdering Ordering = - static_cast(MI.getOperand(5).getImm()); // .loop: - // if(Ordering != AtomicOrdering::Monotonic) - // dbar 0 // ll.w destreg, (alignedaddr) // binop scratch, destreg, incr // xor scratch, destreg, scratch @@ -264,8 +254,6 @@ static void doMaskedAtomicBinOpExpansion( // xor scratch, destreg, scratch // sc.w scratch, scratch, (alignedaddr) // beqz scratch, loop - if (Ordering != AtomicOrdering::Monotonic) - BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); BuildMI(LoopMBB, DL, TII->get(LoongArch::LL_W), DestReg) .addReg(AddrReg) .addImm(0); @@ -372,23 +360,20 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); - auto TailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); // Insert new MBBs. MF->insert(++MBB.getIterator(), LoopHeadMBB); MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB); MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB); - MF->insert(++LoopTailMBB->getIterator(), TailMBB); - MF->insert(++TailMBB->getIterator(), DoneMBB); + MF->insert(++LoopTailMBB->getIterator(), DoneMBB); // Set up successors and transfer remaining instructions to DoneMBB. LoopHeadMBB->addSuccessor(LoopIfBodyMBB); LoopHeadMBB->addSuccessor(LoopTailMBB); LoopIfBodyMBB->addSuccessor(LoopTailMBB); LoopTailMBB->addSuccessor(LoopHeadMBB); - LoopTailMBB->addSuccessor(TailMBB); - TailMBB->addSuccessor(DoneMBB); + LoopTailMBB->addSuccessor(DoneMBB); DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end()); DoneMBB->transferSuccessors(&MBB); MBB.addSuccessor(LoopHeadMBB); @@ -402,11 +387,9 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( // // .loophead: - // dbar 0 // ll.w destreg, (alignedaddr) // and scratch2, destreg, mask // move scratch1, destreg - BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::LL_W), DestReg) .addReg(AddrReg) .addImm(0); @@ -463,7 +446,6 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( // .looptail: // sc.w scratch1, scratch1, (addr) // beqz scratch1, loop - // dbar 0x700 BuildMI(LoopTailMBB, DL, TII->get(LoongArch::SC_W), Scratch1Reg) .addReg(Scratch1Reg) .addReg(AddrReg) @@ -472,10 +454,6 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( .addReg(Scratch1Reg) .addMBB(LoopHeadMBB); - // .tail: - // dbar 0x700 - BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0x700); - NextMBBI = MBB.end(); MI.eraseFromParent(); @@ -483,7 +461,6 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp( computeAndAddLiveIns(LiveRegs, *LoopHeadMBB); computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB); computeAndAddLiveIns(LiveRegs, *LoopTailMBB); - computeAndAddLiveIns(LiveRegs, *TailMBB); computeAndAddLiveIns(LiveRegs, *DoneMBB); return true; @@ -535,12 +512,10 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg( .addReg(CmpValReg) .addMBB(TailMBB); // .looptail: - // dbar 0 // move scratch, newval // sc.[w|d] scratch, scratch, (addr) // beqz scratch, loophead // b done - BuildMI(LoopTailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); BuildMI(LoopTailMBB, DL, TII->get(LoongArch::OR), ScratchReg) .addReg(NewValReg) .addReg(LoongArch::R0); @@ -573,13 +548,11 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg( .addMBB(TailMBB); // .looptail: - // dbar 0 // andn scratch, dest, mask // or scratch, scratch, newval // sc.[w|d] scratch, scratch, (addr) // beqz scratch, loophead // b done - BuildMI(LoopTailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0); BuildMI(LoopTailMBB, DL, TII->get(LoongArch::ANDN), ScratchReg) .addReg(DestReg) .addReg(MaskReg); @@ -598,9 +571,24 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg( BuildMI(LoopTailMBB, DL, TII->get(LoongArch::B)).addMBB(DoneMBB); } + AtomicOrdering Ordering = + static_cast(MI.getOperand(IsMasked ? 6 : 5).getImm()); + int hint; + + switch (Ordering) { + case AtomicOrdering::Acquire: + case AtomicOrdering::AcquireRelease: + case AtomicOrdering::SequentiallyConsistent: + // TODO: acquire + hint = 0; + break; + default: + hint = 0x700; + } + // .tail: - // dbar 0x700 - BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0x700); + // dbar 0x700 | acquire + BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(hint); NextMBBI = MBB.end(); MI.eraseFromParent(); diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td index 5d4ed46025d057..8d1b018995edac 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -1792,7 +1792,7 @@ def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMMinMax; class PseudoCmpXchg : Pseudo<(outs GPR:$res, GPR:$scratch), - (ins GPR:$addr, GPR:$cmpval, GPR:$newval)> { + (ins GPR:$addr, GPR:$cmpval, GPR:$newval, grlenimm:$ordering)> { let Constraints = "@earlyclobber $res,@earlyclobber $scratch"; let mayLoad = 1; let mayStore = 1; @@ -1882,14 +1882,28 @@ def : AtomicPat; -def : Pat<(atomic_cmp_swap_64 GPR:$addr, GPR:$cmp, GPR:$new), - (PseudoCmpXchg64 GPR:$addr, GPR:$cmp, GPR:$new)>; +// Ordering constants must be kept in sync with the AtomicOrdering enum in +// AtomicOrdering.h. +multiclass PseudoCmpXchgPat { + def : Pat<(vt (!cast(Op#"_monotonic") GPR:$addr, GPR:$cmp, GPR:$new)), + (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 2)>; + def : Pat<(vt (!cast(Op#"_acquire") GPR:$addr, GPR:$cmp, GPR:$new)), + (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 4)>; + def : Pat<(vt (!cast(Op#"_release") GPR:$addr, GPR:$cmp, GPR:$new)), + (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 5)>; + def : Pat<(vt (!cast(Op#"_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new)), + (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 6)>; + def : Pat<(vt (!cast(Op#"_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new)), + (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>; +} + +defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32>; +defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64, i64>; def : Pat<(int_loongarch_masked_cmpxchg_i64 GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering), (PseudoMaskedCmpXchg32 GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>; -def : Pat<(atomic_cmp_swap_32 GPR:$addr, GPR:$cmp, GPR:$new), - (PseudoCmpXchg32 GPR:$addr, GPR:$cmp, GPR:$new)>; def : PseudoMaskedAMMinMaxPat; diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll index fba340bed42224..d8b0fc1e095b71 100644 --- a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll +++ b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll @@ -33,14 +33,13 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { ; LA64-NEXT: bne $a5, $a2, .LBB0_5 ; LA64-NEXT: # %bb.4: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB0_3 Depth=2 -; LA64-NEXT: dbar 0 ; LA64-NEXT: move $a7, $a6 ; LA64-NEXT: sc.w $a7, $a0, 0 ; LA64-NEXT: beqz $a7, .LBB0_3 ; LA64-NEXT: b .LBB0_6 ; LA64-NEXT: .LBB0_5: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1 -; LA64-NEXT: dbar 1792 +; LA64-NEXT: dbar 0 ; LA64-NEXT: .LBB0_6: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB0_1 Depth=1 ; LA64-NEXT: addi.w $a6, $a2, 0 @@ -86,14 +85,13 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { ; LA64-NEXT: bne $a5, $a2, .LBB1_5 ; LA64-NEXT: # %bb.4: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB1_3 Depth=2 -; LA64-NEXT: dbar 0 ; LA64-NEXT: move $a7, $a6 ; LA64-NEXT: sc.w $a7, $a0, 0 ; LA64-NEXT: beqz $a7, .LBB1_3 ; LA64-NEXT: b .LBB1_6 ; LA64-NEXT: .LBB1_5: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1 -; LA64-NEXT: dbar 1792 +; LA64-NEXT: dbar 0 ; LA64-NEXT: .LBB1_6: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB1_1 Depth=1 ; LA64-NEXT: addi.w $a6, $a2, 0 @@ -127,14 +125,13 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { ; LA64-NEXT: bne $a1, $a3, .LBB2_5 ; LA64-NEXT: # %bb.4: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB2_3 Depth=2 -; LA64-NEXT: dbar 0 ; LA64-NEXT: move $a6, $a5 ; LA64-NEXT: sc.w $a6, $a0, 0 ; LA64-NEXT: beqz $a6, .LBB2_3 ; LA64-NEXT: b .LBB2_6 ; LA64-NEXT: .LBB2_5: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 -; LA64-NEXT: dbar 1792 +; LA64-NEXT: dbar 0 ; LA64-NEXT: .LBB2_6: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB2_1 Depth=1 ; LA64-NEXT: move $a3, $a1 @@ -166,14 +163,13 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { ; LA64-NEXT: bne $a2, $a3, .LBB3_5 ; LA64-NEXT: # %bb.4: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB3_3 Depth=2 -; LA64-NEXT: dbar 0 ; LA64-NEXT: move $a5, $a4 ; LA64-NEXT: sc.d $a5, $a0, 0 ; LA64-NEXT: beqz $a5, .LBB3_3 ; LA64-NEXT: b .LBB3_6 ; LA64-NEXT: .LBB3_5: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB3_1 Depth=1 -; LA64-NEXT: dbar 1792 +; LA64-NEXT: dbar 0 ; LA64-NEXT: .LBB3_6: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB3_1 Depth=1 ; LA64-NEXT: bne $a2, $a3, .LBB3_1 @@ -221,14 +217,13 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { ; LA64-NEXT: bne $a6, $a2, .LBB4_5 ; LA64-NEXT: # %bb.4: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB4_3 Depth=2 -; LA64-NEXT: dbar 0 ; LA64-NEXT: move $t0, $a7 ; LA64-NEXT: sc.w $t0, $a0, 0 ; LA64-NEXT: beqz $t0, .LBB4_3 ; LA64-NEXT: b .LBB4_6 ; LA64-NEXT: .LBB4_5: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1 -; LA64-NEXT: dbar 1792 +; LA64-NEXT: dbar 0 ; LA64-NEXT: .LBB4_6: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1 ; LA64-NEXT: addi.w $a7, $a2, 0 @@ -279,14 +274,13 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { ; LA64-NEXT: bne $a6, $a2, .LBB5_5 ; LA64-NEXT: # %bb.4: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB5_3 Depth=2 -; LA64-NEXT: dbar 0 ; LA64-NEXT: move $t0, $a7 ; LA64-NEXT: sc.w $t0, $a0, 0 ; LA64-NEXT: beqz $t0, .LBB5_3 ; LA64-NEXT: b .LBB5_6 ; LA64-NEXT: .LBB5_5: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1 -; LA64-NEXT: dbar 1792 +; LA64-NEXT: dbar 0 ; LA64-NEXT: .LBB5_6: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1 ; LA64-NEXT: addi.w $a7, $a2, 0 @@ -325,14 +319,13 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { ; LA64-NEXT: bne $a2, $a4, .LBB6_5 ; LA64-NEXT: # %bb.4: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB6_3 Depth=2 -; LA64-NEXT: dbar 0 ; LA64-NEXT: move $a7, $a6 ; LA64-NEXT: sc.w $a7, $a0, 0 ; LA64-NEXT: beqz $a7, .LBB6_3 ; LA64-NEXT: b .LBB6_6 ; LA64-NEXT: .LBB6_5: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 -; LA64-NEXT: dbar 1792 +; LA64-NEXT: dbar 0 ; LA64-NEXT: .LBB6_6: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1 ; LA64-NEXT: move $a4, $a2 @@ -369,14 +362,13 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { ; LA64-NEXT: bne $a2, $a3, .LBB7_5 ; LA64-NEXT: # %bb.4: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB7_3 Depth=2 -; LA64-NEXT: dbar 0 ; LA64-NEXT: move $a5, $a4 ; LA64-NEXT: sc.d $a5, $a0, 0 ; LA64-NEXT: beqz $a5, .LBB7_3 ; LA64-NEXT: b .LBB7_6 ; LA64-NEXT: .LBB7_5: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB7_1 Depth=1 -; LA64-NEXT: dbar 1792 +; LA64-NEXT: dbar 0 ; LA64-NEXT: .LBB7_6: # %atomicrmw.start ; LA64-NEXT: # in Loop: Header=BB7_1 Depth=1 ; LA64-NEXT: bne $a2, $a3, .LBB7_1 diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll index 4e458e989c27e5..817bafcf0e6228 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll @@ -20,14 +20,13 @@ define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind { ; LA64-NEXT: and $a5, $a4, $a3 ; LA64-NEXT: bne $a5, $a1, .LBB0_3 ; LA64-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: andn $a5, $a4, $a3 ; LA64-NEXT: or $a5, $a5, $a2 ; LA64-NEXT: sc.w $a5, $a0, 0 ; LA64-NEXT: beqz $a5, .LBB0_1 ; LA64-NEXT: b .LBB0_4 ; LA64-NEXT: .LBB0_3: -; LA64-NEXT: dbar 1792 +; LA64-NEXT: dbar 0 ; LA64-NEXT: .LBB0_4: ; LA64-NEXT: ret %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire @@ -54,14 +53,13 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind ; LA64-NEXT: and $a5, $a4, $a3 ; LA64-NEXT: bne $a5, $a1, .LBB1_3 ; LA64-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: andn $a5, $a4, $a3 ; LA64-NEXT: or $a5, $a5, $a2 ; LA64-NEXT: sc.w $a5, $a0, 0 ; LA64-NEXT: beqz $a5, .LBB1_1 ; LA64-NEXT: b .LBB1_4 ; LA64-NEXT: .LBB1_3: -; LA64-NEXT: dbar 1792 +; LA64-NEXT: dbar 0 ; LA64-NEXT: .LBB1_4: ; LA64-NEXT: ret %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire @@ -75,13 +73,12 @@ define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind ; LA64-NEXT: ll.w $a3, $a0, 0 ; LA64-NEXT: bne $a3, $a1, .LBB2_3 ; LA64-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: move $a4, $a2 ; LA64-NEXT: sc.w $a4, $a0, 0 ; LA64-NEXT: beqz $a4, .LBB2_1 ; LA64-NEXT: b .LBB2_4 ; LA64-NEXT: .LBB2_3: -; LA64-NEXT: dbar 1792 +; LA64-NEXT: dbar 0 ; LA64-NEXT: .LBB2_4: ; LA64-NEXT: ret %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire @@ -95,13 +92,12 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind ; LA64-NEXT: ll.d $a3, $a0, 0 ; LA64-NEXT: bne $a3, $a1, .LBB3_3 ; LA64-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: move $a4, $a2 ; LA64-NEXT: sc.d $a4, $a0, 0 ; LA64-NEXT: beqz $a4, .LBB3_1 ; LA64-NEXT: b .LBB3_4 ; LA64-NEXT: .LBB3_3: -; LA64-NEXT: dbar 1792 +; LA64-NEXT: dbar 0 ; LA64-NEXT: .LBB3_4: ; LA64-NEXT: ret %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire @@ -127,14 +123,13 @@ define i8 @cmpxchg_i8_acquire_acquire_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind ; LA64-NEXT: and $a6, $a5, $a4 ; LA64-NEXT: bne $a6, $a1, .LBB4_3 ; LA64-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: andn $a6, $a5, $a4 ; LA64-NEXT: or $a6, $a6, $a2 ; LA64-NEXT: sc.w $a6, $a0, 0 ; LA64-NEXT: beqz $a6, .LBB4_1 ; LA64-NEXT: b .LBB4_4 ; LA64-NEXT: .LBB4_3: -; LA64-NEXT: dbar 1792 +; LA64-NEXT: dbar 0 ; LA64-NEXT: .LBB4_4: ; LA64-NEXT: srl.w $a0, $a5, $a3 ; LA64-NEXT: ret @@ -163,14 +158,13 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou ; LA64-NEXT: and $a6, $a5, $a3 ; LA64-NEXT: bne $a6, $a1, .LBB5_3 ; LA64-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: andn $a6, $a5, $a3 ; LA64-NEXT: or $a6, $a6, $a2 ; LA64-NEXT: sc.w $a6, $a0, 0 ; LA64-NEXT: beqz $a6, .LBB5_1 ; LA64-NEXT: b .LBB5_4 ; LA64-NEXT: .LBB5_3: -; LA64-NEXT: dbar 1792 +; LA64-NEXT: dbar 0 ; LA64-NEXT: .LBB5_4: ; LA64-NEXT: srl.w $a0, $a5, $a4 ; LA64-NEXT: ret @@ -186,13 +180,12 @@ define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nou ; LA64-NEXT: ll.w $a3, $a0, 0 ; LA64-NEXT: bne $a3, $a1, .LBB6_3 ; LA64-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: move $a4, $a2 ; LA64-NEXT: sc.w $a4, $a0, 0 ; LA64-NEXT: beqz $a4, .LBB6_1 ; LA64-NEXT: b .LBB6_4 ; LA64-NEXT: .LBB6_3: -; LA64-NEXT: dbar 1792 +; LA64-NEXT: dbar 0 ; LA64-NEXT: .LBB6_4: ; LA64-NEXT: move $a0, $a3 ; LA64-NEXT: ret @@ -208,13 +201,12 @@ define i64 @cmpxchg_i64_acquire_acquire_reti64(ptr %ptr, i64 %cmp, i64 %val) nou ; LA64-NEXT: ll.d $a3, $a0, 0 ; LA64-NEXT: bne $a3, $a1, .LBB7_3 ; LA64-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: move $a4, $a2 ; LA64-NEXT: sc.d $a4, $a0, 0 ; LA64-NEXT: beqz $a4, .LBB7_1 ; LA64-NEXT: b .LBB7_4 ; LA64-NEXT: .LBB7_3: -; LA64-NEXT: dbar 1792 +; LA64-NEXT: dbar 0 ; LA64-NEXT: .LBB7_4: ; LA64-NEXT: move $a0, $a3 ; LA64-NEXT: ret @@ -242,14 +234,13 @@ define i1 @cmpxchg_i8_acquire_acquire_reti1(ptr %ptr, i8 %cmp, i8 %val) nounwind ; LA64-NEXT: and $a6, $a5, $a3 ; LA64-NEXT: bne $a6, $a1, .LBB8_3 ; LA64-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: andn $a6, $a5, $a3 ; LA64-NEXT: or $a6, $a6, $a2 ; LA64-NEXT: sc.w $a6, $a0, 0 ; LA64-NEXT: beqz $a6, .LBB8_1 ; LA64-NEXT: b .LBB8_4 ; LA64-NEXT: .LBB8_3: -; LA64-NEXT: dbar 1792 +; LA64-NEXT: dbar 0 ; LA64-NEXT: .LBB8_4: ; LA64-NEXT: and $a0, $a5, $a4 ; LA64-NEXT: addi.w $a0, $a0, 0 @@ -281,14 +272,13 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw ; LA64-NEXT: and $a6, $a5, $a4 ; LA64-NEXT: bne $a6, $a1, .LBB9_3 ; LA64-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: andn $a6, $a5, $a4 ; LA64-NEXT: or $a6, $a6, $a2 ; LA64-NEXT: sc.w $a6, $a0, 0 ; LA64-NEXT: beqz $a6, .LBB9_1 ; LA64-NEXT: b .LBB9_4 ; LA64-NEXT: .LBB9_3: -; LA64-NEXT: dbar 1792 +; LA64-NEXT: dbar 0 ; LA64-NEXT: .LBB9_4: ; LA64-NEXT: and $a0, $a5, $a3 ; LA64-NEXT: addi.w $a0, $a0, 0 @@ -307,13 +297,12 @@ define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounw ; LA64-NEXT: ll.w $a3, $a0, 0 ; LA64-NEXT: bne $a3, $a1, .LBB10_3 ; LA64-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: move $a4, $a2 ; LA64-NEXT: sc.w $a4, $a0, 0 ; LA64-NEXT: beqz $a4, .LBB10_1 ; LA64-NEXT: b .LBB10_4 ; LA64-NEXT: .LBB10_3: -; LA64-NEXT: dbar 1792 +; LA64-NEXT: dbar 0 ; LA64-NEXT: .LBB10_4: ; LA64-NEXT: addi.w $a0, $a1, 0 ; LA64-NEXT: xor $a0, $a3, $a0 @@ -331,13 +320,12 @@ define i1 @cmpxchg_i64_acquire_acquire_reti1(ptr %ptr, i64 %cmp, i64 %val) nounw ; LA64-NEXT: ll.d $a3, $a0, 0 ; LA64-NEXT: bne $a3, $a1, .LBB11_3 ; LA64-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: move $a4, $a2 ; LA64-NEXT: sc.d $a4, $a0, 0 ; LA64-NEXT: beqz $a4, .LBB11_1 ; LA64-NEXT: b .LBB11_4 ; LA64-NEXT: .LBB11_3: -; LA64-NEXT: dbar 1792 +; LA64-NEXT: dbar 0 ; LA64-NEXT: .LBB11_4: ; LA64-NEXT: xor $a0, $a3, $a1 ; LA64-NEXT: sltui $a0, $a0, 1 @@ -346,3 +334,337 @@ define i1 @cmpxchg_i64_acquire_acquire_reti1(ptr %ptr, i64 %cmp, i64 %val) nounw %res = extractvalue { i64, i1 } %tmp, 1 ret i1 %res } + +define void @cmpxchg_i8_monotonic_monotonic(ptr %ptr, i8 %cmp, i8 %val) nounwind { +; LA64-LABEL: cmpxchg_i8_monotonic_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: slli.d $a3, $a0, 3 +; LA64-NEXT: sll.w $a1, $a1, $a3 +; LA64-NEXT: andi $a2, $a2, 255 +; LA64-NEXT: sll.w $a2, $a2, $a3 +; LA64-NEXT: ori $a4, $zero, 255 +; LA64-NEXT: sll.w $a3, $a4, $a3 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: addi.w $a2, $a2, 0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a0, 0 +; LA64-NEXT: and $a5, $a4, $a3 +; LA64-NEXT: bne $a5, $a1, .LBB12_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 +; LA64-NEXT: andn $a5, $a4, $a3 +; LA64-NEXT: or $a5, $a5, $a2 +; LA64-NEXT: sc.w $a5, $a0, 0 +; LA64-NEXT: beqz $a5, .LBB12_1 +; LA64-NEXT: b .LBB12_4 +; LA64-NEXT: .LBB12_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB12_4: +; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic + ret void +} + +define void @cmpxchg_i16_monotonic_monotonic(ptr %ptr, i16 %cmp, i16 %val) nounwind { +; LA64-LABEL: cmpxchg_i16_monotonic_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: slli.d $a3, $a0, 3 +; LA64-NEXT: sll.w $a1, $a1, $a3 +; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0 +; LA64-NEXT: sll.w $a2, $a2, $a3 +; LA64-NEXT: lu12i.w $a4, 15 +; LA64-NEXT: ori $a4, $a4, 4095 +; LA64-NEXT: sll.w $a3, $a4, $a3 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: addi.w $a2, $a2, 0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a0, 0 +; LA64-NEXT: and $a5, $a4, $a3 +; LA64-NEXT: bne $a5, $a1, .LBB13_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 +; LA64-NEXT: andn $a5, $a4, $a3 +; LA64-NEXT: or $a5, $a5, $a2 +; LA64-NEXT: sc.w $a5, $a0, 0 +; LA64-NEXT: beqz $a5, .LBB13_1 +; LA64-NEXT: b .LBB13_4 +; LA64-NEXT: .LBB13_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB13_4: +; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic + ret void +} + +define void @cmpxchg_i32_monotonic_monotonic(ptr %ptr, i32 %cmp, i32 %val) nounwind { +; LA64-LABEL: cmpxchg_i32_monotonic_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a3, $a0, 0 +; LA64-NEXT: bne $a3, $a1, .LBB14_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 +; LA64-NEXT: move $a4, $a2 +; LA64-NEXT: sc.w $a4, $a0, 0 +; LA64-NEXT: beqz $a4, .LBB14_1 +; LA64-NEXT: b .LBB14_4 +; LA64-NEXT: .LBB14_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB14_4: +; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic + ret void +} + +define void @cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwind { +; LA64-LABEL: cmpxchg_i64_monotonic_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.d $a3, $a0, 0 +; LA64-NEXT: bne $a3, $a1, .LBB15_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; LA64-NEXT: move $a4, $a2 +; LA64-NEXT: sc.d $a4, $a0, 0 +; LA64-NEXT: beqz $a4, .LBB15_1 +; LA64-NEXT: b .LBB15_4 +; LA64-NEXT: .LBB15_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB15_4: +; LA64-NEXT: ret + %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic + ret void +} + +define i8 @cmpxchg_i8_monotonic_monotonic_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind { +; LA64-LABEL: cmpxchg_i8_monotonic_monotonic_reti8: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a3, $a0, 3 +; LA64-NEXT: ori $a4, $zero, 255 +; LA64-NEXT: sll.w $a4, $a4, $a3 +; LA64-NEXT: andi $a2, $a2, 255 +; LA64-NEXT: addi.w $a4, $a4, 0 +; LA64-NEXT: sll.w $a2, $a2, $a3 +; LA64-NEXT: addi.w $a2, $a2, 0 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a3 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a5, $a0, 0 +; LA64-NEXT: and $a6, $a5, $a4 +; LA64-NEXT: bne $a6, $a1, .LBB16_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 +; LA64-NEXT: andn $a6, $a5, $a4 +; LA64-NEXT: or $a6, $a6, $a2 +; LA64-NEXT: sc.w $a6, $a0, 0 +; LA64-NEXT: beqz $a6, .LBB16_1 +; LA64-NEXT: b .LBB16_4 +; LA64-NEXT: .LBB16_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB16_4: +; LA64-NEXT: srl.w $a0, $a5, $a3 +; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic + %res = extractvalue { i8, i1 } %tmp, 0 + ret i8 %res +} + +define i16 @cmpxchg_i16_monotonic_monotonic_reti16(ptr %ptr, i16 %cmp, i16 %val) nounwind { +; LA64-LABEL: cmpxchg_i16_monotonic_monotonic_reti16: +; LA64: # %bb.0: +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: slli.d $a4, $a0, 3 +; LA64-NEXT: sll.w $a3, $a3, $a4 +; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0 +; LA64-NEXT: addi.w $a3, $a3, 0 +; LA64-NEXT: sll.w $a2, $a2, $a4 +; LA64-NEXT: addi.w $a2, $a2, 0 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a4 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a5, $a0, 0 +; LA64-NEXT: and $a6, $a5, $a3 +; LA64-NEXT: bne $a6, $a1, .LBB17_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 +; LA64-NEXT: andn $a6, $a5, $a3 +; LA64-NEXT: or $a6, $a6, $a2 +; LA64-NEXT: sc.w $a6, $a0, 0 +; LA64-NEXT: beqz $a6, .LBB17_1 +; LA64-NEXT: b .LBB17_4 +; LA64-NEXT: .LBB17_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB17_4: +; LA64-NEXT: srl.w $a0, $a5, $a4 +; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic + %res = extractvalue { i16, i1 } %tmp, 0 + ret i16 %res +} + +define i32 @cmpxchg_i32_monotonic_monotonic_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind { +; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti32: +; LA64: # %bb.0: +; LA64-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a3, $a0, 0 +; LA64-NEXT: bne $a3, $a1, .LBB18_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1 +; LA64-NEXT: move $a4, $a2 +; LA64-NEXT: sc.w $a4, $a0, 0 +; LA64-NEXT: beqz $a4, .LBB18_1 +; LA64-NEXT: b .LBB18_4 +; LA64-NEXT: .LBB18_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB18_4: +; LA64-NEXT: move $a0, $a3 +; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic + %res = extractvalue { i32, i1 } %tmp, 0 + ret i32 %res +} + +define i64 @cmpxchg_i64_monotonic_monotonic_reti64(ptr %ptr, i64 %cmp, i64 %val) nounwind { +; LA64-LABEL: cmpxchg_i64_monotonic_monotonic_reti64: +; LA64: # %bb.0: +; LA64-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.d $a3, $a0, 0 +; LA64-NEXT: bne $a3, $a1, .LBB19_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1 +; LA64-NEXT: move $a4, $a2 +; LA64-NEXT: sc.d $a4, $a0, 0 +; LA64-NEXT: beqz $a4, .LBB19_1 +; LA64-NEXT: b .LBB19_4 +; LA64-NEXT: .LBB19_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB19_4: +; LA64-NEXT: move $a0, $a3 +; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic + %res = extractvalue { i64, i1 } %tmp, 0 + ret i64 %res +} + +define i1 @cmpxchg_i8_monotonic_monotonic_reti1(ptr %ptr, i8 %cmp, i8 %val) nounwind { +; LA64-LABEL: cmpxchg_i8_monotonic_monotonic_reti1: +; LA64: # %bb.0: +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: slli.d $a3, $a0, 3 +; LA64-NEXT: ori $a4, $zero, 255 +; LA64-NEXT: sll.w $a4, $a4, $a3 +; LA64-NEXT: sll.w $a1, $a1, $a3 +; LA64-NEXT: andi $a2, $a2, 255 +; LA64-NEXT: sll.w $a2, $a2, $a3 +; LA64-NEXT: addi.w $a2, $a2, 0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: addi.w $a3, $a4, 0 +; LA64-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a5, $a0, 0 +; LA64-NEXT: and $a6, $a5, $a3 +; LA64-NEXT: bne $a6, $a1, .LBB20_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 +; LA64-NEXT: andn $a6, $a5, $a3 +; LA64-NEXT: or $a6, $a6, $a2 +; LA64-NEXT: sc.w $a6, $a0, 0 +; LA64-NEXT: beqz $a6, .LBB20_1 +; LA64-NEXT: b .LBB20_4 +; LA64-NEXT: .LBB20_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB20_4: +; LA64-NEXT: and $a0, $a5, $a4 +; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: xor $a0, $a1, $a0 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic + %res = extractvalue { i8, i1 } %tmp, 1 + ret i1 %res +} + +define i1 @cmpxchg_i16_monotonic_monotonic_reti1(ptr %ptr, i16 %cmp, i16 %val) nounwind { +; LA64-LABEL: cmpxchg_i16_monotonic_monotonic_reti1: +; LA64: # %bb.0: +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: slli.d $a4, $a0, 3 +; LA64-NEXT: sll.w $a3, $a3, $a4 +; LA64-NEXT: sll.w $a1, $a1, $a4 +; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0 +; LA64-NEXT: sll.w $a2, $a2, $a4 +; LA64-NEXT: addi.w $a2, $a2, 0 +; LA64-NEXT: addi.w $a1, $a1, 0 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: addi.w $a4, $a3, 0 +; LA64-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a5, $a0, 0 +; LA64-NEXT: and $a6, $a5, $a4 +; LA64-NEXT: bne $a6, $a1, .LBB21_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 +; LA64-NEXT: andn $a6, $a5, $a4 +; LA64-NEXT: or $a6, $a6, $a2 +; LA64-NEXT: sc.w $a6, $a0, 0 +; LA64-NEXT: beqz $a6, .LBB21_1 +; LA64-NEXT: b .LBB21_4 +; LA64-NEXT: .LBB21_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB21_4: +; LA64-NEXT: and $a0, $a5, $a3 +; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: xor $a0, $a1, $a0 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic + %res = extractvalue { i16, i1 } %tmp, 1 + ret i1 %res +} + +define i1 @cmpxchg_i32_monotonic_monotonic_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind { +; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti1: +; LA64: # %bb.0: +; LA64-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a3, $a0, 0 +; LA64-NEXT: bne $a3, $a1, .LBB22_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 +; LA64-NEXT: move $a4, $a2 +; LA64-NEXT: sc.w $a4, $a0, 0 +; LA64-NEXT: beqz $a4, .LBB22_1 +; LA64-NEXT: b .LBB22_4 +; LA64-NEXT: .LBB22_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB22_4: +; LA64-NEXT: addi.w $a0, $a1, 0 +; LA64-NEXT: xor $a0, $a3, $a0 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic + %res = extractvalue { i32, i1 } %tmp, 1 + ret i1 %res +} + +define i1 @cmpxchg_i64_monotonic_monotonic_reti1(ptr %ptr, i64 %cmp, i64 %val) nounwind { +; LA64-LABEL: cmpxchg_i64_monotonic_monotonic_reti1: +; LA64: # %bb.0: +; LA64-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.d $a3, $a0, 0 +; LA64-NEXT: bne $a3, $a1, .LBB23_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 +; LA64-NEXT: move $a4, $a2 +; LA64-NEXT: sc.d $a4, $a0, 0 +; LA64-NEXT: beqz $a4, .LBB23_1 +; LA64-NEXT: b .LBB23_4 +; LA64-NEXT: .LBB23_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: .LBB23_4: +; LA64-NEXT: xor $a0, $a3, $a1 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: ret + %tmp = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic + %res = extractvalue { i64, i1 } %tmp, 1 + ret i1 %res +} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll index 9767717395b67e..9a29d67e998276 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll @@ -25,14 +25,13 @@ define float @float_fadd_acquire(ptr %p) nounwind { ; LA64F-NEXT: bne $a3, $a2, .LBB0_5 ; LA64F-NEXT: # %bb.4: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB0_3 Depth=2 -; LA64F-NEXT: dbar 0 ; LA64F-NEXT: move $a4, $a1 ; LA64F-NEXT: sc.w $a4, $a0, 0 ; LA64F-NEXT: beqz $a4, .LBB0_3 ; LA64F-NEXT: b .LBB0_6 ; LA64F-NEXT: .LBB0_5: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1 -; LA64F-NEXT: dbar 1792 +; LA64F-NEXT: dbar 0 ; LA64F-NEXT: .LBB0_6: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1 ; LA64F-NEXT: movgr2fr.w $fa0, $a3 @@ -61,14 +60,13 @@ define float @float_fadd_acquire(ptr %p) nounwind { ; LA64D-NEXT: bne $a3, $a2, .LBB0_5 ; LA64D-NEXT: # %bb.4: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB0_3 Depth=2 -; LA64D-NEXT: dbar 0 ; LA64D-NEXT: move $a4, $a1 ; LA64D-NEXT: sc.w $a4, $a0, 0 ; LA64D-NEXT: beqz $a4, .LBB0_3 ; LA64D-NEXT: b .LBB0_6 ; LA64D-NEXT: .LBB0_5: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1 -; LA64D-NEXT: dbar 1792 +; LA64D-NEXT: dbar 0 ; LA64D-NEXT: .LBB0_6: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1 ; LA64D-NEXT: movgr2fr.w $fa0, $a3 @@ -101,14 +99,13 @@ define float @float_fsub_acquire(ptr %p) nounwind { ; LA64F-NEXT: bne $a3, $a2, .LBB1_5 ; LA64F-NEXT: # %bb.4: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB1_3 Depth=2 -; LA64F-NEXT: dbar 0 ; LA64F-NEXT: move $a4, $a1 ; LA64F-NEXT: sc.w $a4, $a0, 0 ; LA64F-NEXT: beqz $a4, .LBB1_3 ; LA64F-NEXT: b .LBB1_6 ; LA64F-NEXT: .LBB1_5: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1 -; LA64F-NEXT: dbar 1792 +; LA64F-NEXT: dbar 0 ; LA64F-NEXT: .LBB1_6: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1 ; LA64F-NEXT: movgr2fr.w $fa0, $a3 @@ -137,14 +134,13 @@ define float @float_fsub_acquire(ptr %p) nounwind { ; LA64D-NEXT: bne $a3, $a2, .LBB1_5 ; LA64D-NEXT: # %bb.4: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB1_3 Depth=2 -; LA64D-NEXT: dbar 0 ; LA64D-NEXT: move $a4, $a1 ; LA64D-NEXT: sc.w $a4, $a0, 0 ; LA64D-NEXT: beqz $a4, .LBB1_3 ; LA64D-NEXT: b .LBB1_6 ; LA64D-NEXT: .LBB1_5: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1 -; LA64D-NEXT: dbar 1792 +; LA64D-NEXT: dbar 0 ; LA64D-NEXT: .LBB1_6: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1 ; LA64D-NEXT: movgr2fr.w $fa0, $a3 @@ -178,14 +174,13 @@ define float @float_fmin_acquire(ptr %p) nounwind { ; LA64F-NEXT: bne $a3, $a2, .LBB2_5 ; LA64F-NEXT: # %bb.4: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB2_3 Depth=2 -; LA64F-NEXT: dbar 0 ; LA64F-NEXT: move $a4, $a1 ; LA64F-NEXT: sc.w $a4, $a0, 0 ; LA64F-NEXT: beqz $a4, .LBB2_3 ; LA64F-NEXT: b .LBB2_6 ; LA64F-NEXT: .LBB2_5: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB2_1 Depth=1 -; LA64F-NEXT: dbar 1792 +; LA64F-NEXT: dbar 0 ; LA64F-NEXT: .LBB2_6: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB2_1 Depth=1 ; LA64F-NEXT: movgr2fr.w $fa0, $a3 @@ -215,14 +210,13 @@ define float @float_fmin_acquire(ptr %p) nounwind { ; LA64D-NEXT: bne $a3, $a2, .LBB2_5 ; LA64D-NEXT: # %bb.4: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB2_3 Depth=2 -; LA64D-NEXT: dbar 0 ; LA64D-NEXT: move $a4, $a1 ; LA64D-NEXT: sc.w $a4, $a0, 0 ; LA64D-NEXT: beqz $a4, .LBB2_3 ; LA64D-NEXT: b .LBB2_6 ; LA64D-NEXT: .LBB2_5: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB2_1 Depth=1 -; LA64D-NEXT: dbar 1792 +; LA64D-NEXT: dbar 0 ; LA64D-NEXT: .LBB2_6: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB2_1 Depth=1 ; LA64D-NEXT: movgr2fr.w $fa0, $a3 @@ -256,14 +250,13 @@ define float @float_fmax_acquire(ptr %p) nounwind { ; LA64F-NEXT: bne $a3, $a2, .LBB3_5 ; LA64F-NEXT: # %bb.4: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB3_3 Depth=2 -; LA64F-NEXT: dbar 0 ; LA64F-NEXT: move $a4, $a1 ; LA64F-NEXT: sc.w $a4, $a0, 0 ; LA64F-NEXT: beqz $a4, .LBB3_3 ; LA64F-NEXT: b .LBB3_6 ; LA64F-NEXT: .LBB3_5: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB3_1 Depth=1 -; LA64F-NEXT: dbar 1792 +; LA64F-NEXT: dbar 0 ; LA64F-NEXT: .LBB3_6: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB3_1 Depth=1 ; LA64F-NEXT: movgr2fr.w $fa0, $a3 @@ -293,14 +286,13 @@ define float @float_fmax_acquire(ptr %p) nounwind { ; LA64D-NEXT: bne $a3, $a2, .LBB3_5 ; LA64D-NEXT: # %bb.4: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB3_3 Depth=2 -; LA64D-NEXT: dbar 0 ; LA64D-NEXT: move $a4, $a1 ; LA64D-NEXT: sc.w $a4, $a0, 0 ; LA64D-NEXT: beqz $a4, .LBB3_3 ; LA64D-NEXT: b .LBB3_6 ; LA64D-NEXT: .LBB3_5: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB3_1 Depth=1 -; LA64D-NEXT: dbar 1792 +; LA64D-NEXT: dbar 0 ; LA64D-NEXT: .LBB3_6: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB3_1 Depth=1 ; LA64D-NEXT: movgr2fr.w $fa0, $a3 diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll index d1fb9ede9a9ae2..27b8cc17fd64f2 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll @@ -16,7 +16,6 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind { ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: ll.w $a4, $a0, 0 ; LA64-NEXT: and $a6, $a4, $a3 ; LA64-NEXT: move $a5, $a4 @@ -29,8 +28,6 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind { ; LA64-NEXT: sc.w $a5, $a0, 0 ; LA64-NEXT: beqz $a5, .LBB0_1 ; LA64-NEXT: # %bb.4: -; LA64-NEXT: dbar 1792 -; LA64-NEXT: # %bb.5: ; LA64-NEXT: srl.w $a0, $a4, $a2 ; LA64-NEXT: ret %1 = atomicrmw umax ptr %a, i8 %b acquire @@ -50,7 +47,6 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind { ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: ll.w $a4, $a0, 0 ; LA64-NEXT: and $a6, $a4, $a2 ; LA64-NEXT: move $a5, $a4 @@ -63,8 +59,6 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind { ; LA64-NEXT: sc.w $a5, $a0, 0 ; LA64-NEXT: beqz $a5, .LBB1_1 ; LA64-NEXT: # %bb.4: -; LA64-NEXT: dbar 1792 -; LA64-NEXT: # %bb.5: ; LA64-NEXT: srl.w $a0, $a4, $a3 ; LA64-NEXT: ret %1 = atomicrmw umax ptr %a, i16 %b acquire @@ -103,7 +97,6 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind { ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: ll.w $a4, $a0, 0 ; LA64-NEXT: and $a6, $a4, $a3 ; LA64-NEXT: move $a5, $a4 @@ -116,8 +109,6 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind { ; LA64-NEXT: sc.w $a5, $a0, 0 ; LA64-NEXT: beqz $a5, .LBB4_1 ; LA64-NEXT: # %bb.4: -; LA64-NEXT: dbar 1792 -; LA64-NEXT: # %bb.5: ; LA64-NEXT: srl.w $a0, $a4, $a2 ; LA64-NEXT: ret %1 = atomicrmw umin ptr %a, i8 %b acquire @@ -137,7 +128,6 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind { ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: ll.w $a4, $a0, 0 ; LA64-NEXT: and $a6, $a4, $a2 ; LA64-NEXT: move $a5, $a4 @@ -150,8 +140,6 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind { ; LA64-NEXT: sc.w $a5, $a0, 0 ; LA64-NEXT: beqz $a5, .LBB5_1 ; LA64-NEXT: # %bb.4: -; LA64-NEXT: dbar 1792 -; LA64-NEXT: # %bb.5: ; LA64-NEXT: srl.w $a0, $a4, $a3 ; LA64-NEXT: ret %1 = atomicrmw umin ptr %a, i16 %b acquire @@ -192,7 +180,6 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { ; LA64-NEXT: andi $a4, $a2, 24 ; LA64-NEXT: xori $a4, $a4, 56 ; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: ll.w $a5, $a0, 0 ; LA64-NEXT: and $a7, $a5, $a3 ; LA64-NEXT: move $a6, $a5 @@ -207,8 +194,6 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { ; LA64-NEXT: sc.w $a6, $a0, 0 ; LA64-NEXT: beqz $a6, .LBB8_1 ; LA64-NEXT: # %bb.4: -; LA64-NEXT: dbar 1792 -; LA64-NEXT: # %bb.5: ; LA64-NEXT: srl.w $a0, $a5, $a2 ; LA64-NEXT: ret %1 = atomicrmw max ptr %a, i8 %b acquire @@ -231,7 +216,6 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: ll.w $a5, $a0, 0 ; LA64-NEXT: and $a7, $a5, $a4 ; LA64-NEXT: move $a6, $a5 @@ -246,8 +230,6 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { ; LA64-NEXT: sc.w $a6, $a0, 0 ; LA64-NEXT: beqz $a6, .LBB9_1 ; LA64-NEXT: # %bb.4: -; LA64-NEXT: dbar 1792 -; LA64-NEXT: # %bb.5: ; LA64-NEXT: srl.w $a0, $a5, $a2 ; LA64-NEXT: ret %1 = atomicrmw max ptr %a, i16 %b acquire @@ -288,7 +270,6 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { ; LA64-NEXT: andi $a4, $a2, 24 ; LA64-NEXT: xori $a4, $a4, 56 ; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: ll.w $a5, $a0, 0 ; LA64-NEXT: and $a7, $a5, $a3 ; LA64-NEXT: move $a6, $a5 @@ -303,8 +284,6 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { ; LA64-NEXT: sc.w $a6, $a0, 0 ; LA64-NEXT: beqz $a6, .LBB12_1 ; LA64-NEXT: # %bb.4: -; LA64-NEXT: dbar 1792 -; LA64-NEXT: # %bb.5: ; LA64-NEXT: srl.w $a0, $a5, $a2 ; LA64-NEXT: ret %1 = atomicrmw min ptr %a, i8 %b acquire @@ -327,7 +306,6 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: ll.w $a5, $a0, 0 ; LA64-NEXT: and $a7, $a5, $a4 ; LA64-NEXT: move $a6, $a5 @@ -342,8 +320,6 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { ; LA64-NEXT: sc.w $a6, $a0, 0 ; LA64-NEXT: beqz $a6, .LBB13_1 ; LA64-NEXT: # %bb.4: -; LA64-NEXT: dbar 1792 -; LA64-NEXT: # %bb.5: ; LA64-NEXT: srl.w $a0, $a5, $a2 ; LA64-NEXT: ret %1 = atomicrmw min ptr %a, i16 %b acquire diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll index 8bab095fba93dc..86770c3a262575 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll @@ -12,7 +12,6 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind { ; LA32-NEXT: sll.w $a1, $a1, $a2 ; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a4, $a0, 0 ; LA32-NEXT: addi.w $a5, $a1, 0 ; LA32-NEXT: xor $a5, $a4, $a5 @@ -35,7 +34,6 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind { ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: ll.w $a4, $a0, 0 ; LA64-NEXT: addi.w $a5, $a1, 0 ; LA64-NEXT: xor $a5, $a4, $a5 @@ -59,7 +57,6 @@ define i8 @atomicrmw_xchg_0_i8_acquire(ptr %a) nounwind { ; LA32-NEXT: nor $a2, $a2, $zero ; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a3, $a0, 0 ; LA32-NEXT: and $a4, $a3, $a2 ; LA32-NEXT: sc.w $a4, $a0, 0 @@ -90,7 +87,6 @@ define i8 @atomicrmw_xchg_minus_1_i8_acquire(ptr %a) nounwind { ; LA32-NEXT: sll.w $a2, $a2, $a1 ; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a3, $a0, 0 ; LA32-NEXT: or $a4, $a3, $a2 ; LA32-NEXT: sc.w $a4, $a0, 0 @@ -123,7 +119,6 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind { ; LA32-NEXT: sll.w $a1, $a1, $a3 ; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a4, $a0, 0 ; LA32-NEXT: addi.w $a5, $a1, 0 ; LA32-NEXT: xor $a5, $a4, $a5 @@ -147,7 +142,6 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind { ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: ll.w $a4, $a0, 0 ; LA64-NEXT: addi.w $a5, $a1, 0 ; LA64-NEXT: xor $a5, $a4, $a5 @@ -172,7 +166,6 @@ define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind { ; LA32-NEXT: nor $a1, $a1, $zero ; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a3, $a0, 0 ; LA32-NEXT: and $a4, $a3, $a1 ; LA32-NEXT: sc.w $a4, $a0, 0 @@ -205,7 +198,6 @@ define i16 @atomicrmw_xchg_minus_1_i16_acquire(ptr %a) nounwind { ; LA32-NEXT: sll.w $a1, $a1, $a2 ; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a3, $a0, 0 ; LA32-NEXT: or $a4, $a3, $a1 ; LA32-NEXT: sc.w $a4, $a0, 0 @@ -232,7 +224,6 @@ define i32 @atomicrmw_xchg_i32_acquire(ptr %a, i32 %b) nounwind { ; LA32-LABEL: atomicrmw_xchg_i32_acquire: ; LA32: # %bb.0: ; LA32-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a2, $a0, 0 ; LA32-NEXT: move $a3, $a1 ; LA32-NEXT: sc.w $a3, $a0, 0 @@ -280,7 +271,6 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind { ; LA32-NEXT: sll.w $a1, $a1, $a2 ; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a4, $a0, 0 ; LA32-NEXT: add.w $a5, $a4, $a1 ; LA32-NEXT: xor $a5, $a4, $a5 @@ -303,7 +293,6 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind { ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: ll.w $a4, $a0, 0 ; LA64-NEXT: add.w $a5, $a4, $a1 ; LA64-NEXT: xor $a5, $a4, $a5 @@ -329,7 +318,6 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind { ; LA32-NEXT: sll.w $a1, $a1, $a3 ; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a4, $a0, 0 ; LA32-NEXT: add.w $a5, $a4, $a1 ; LA32-NEXT: xor $a5, $a4, $a5 @@ -353,7 +341,6 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind { ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: ll.w $a4, $a0, 0 ; LA64-NEXT: add.w $a5, $a4, $a1 ; LA64-NEXT: xor $a5, $a4, $a5 @@ -372,7 +359,6 @@ define i32 @atomicrmw_add_i32_acquire(ptr %a, i32 %b) nounwind { ; LA32-LABEL: atomicrmw_add_i32_acquire: ; LA32: # %bb.0: ; LA32-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a2, $a0, 0 ; LA32-NEXT: add.w $a3, $a2, $a1 ; LA32-NEXT: sc.w $a3, $a0, 0 @@ -420,7 +406,6 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind { ; LA32-NEXT: sll.w $a1, $a1, $a2 ; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a4, $a0, 0 ; LA32-NEXT: sub.w $a5, $a4, $a1 ; LA32-NEXT: xor $a5, $a4, $a5 @@ -443,7 +428,6 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind { ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: ll.w $a4, $a0, 0 ; LA64-NEXT: sub.w $a5, $a4, $a1 ; LA64-NEXT: xor $a5, $a4, $a5 @@ -469,7 +453,6 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind { ; LA32-NEXT: sll.w $a1, $a1, $a3 ; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a4, $a0, 0 ; LA32-NEXT: sub.w $a5, $a4, $a1 ; LA32-NEXT: xor $a5, $a4, $a5 @@ -493,7 +476,6 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind { ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: ll.w $a4, $a0, 0 ; LA64-NEXT: sub.w $a5, $a4, $a1 ; LA64-NEXT: xor $a5, $a4, $a5 @@ -512,7 +494,6 @@ define i32 @atomicrmw_sub_i32_acquire(ptr %a, i32 %b) nounwind { ; LA32-LABEL: atomicrmw_sub_i32_acquire: ; LA32: # %bb.0: ; LA32-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a2, $a0, 0 ; LA32-NEXT: sub.w $a3, $a2, $a1 ; LA32-NEXT: sc.w $a3, $a0, 0 @@ -562,7 +543,6 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind { ; LA32-NEXT: sll.w $a1, $a1, $a2 ; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a4, $a0, 0 ; LA32-NEXT: and $a5, $a4, $a1 ; LA32-NEXT: nor $a5, $a5, $zero @@ -586,7 +566,6 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind { ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: ll.w $a4, $a0, 0 ; LA64-NEXT: and $a5, $a4, $a1 ; LA64-NEXT: nor $a5, $a5, $zero @@ -613,7 +592,6 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind { ; LA32-NEXT: sll.w $a1, $a1, $a3 ; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a4, $a0, 0 ; LA32-NEXT: and $a5, $a4, $a1 ; LA32-NEXT: nor $a5, $a5, $zero @@ -638,7 +616,6 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind { ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 ; LA64-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: ll.w $a4, $a0, 0 ; LA64-NEXT: and $a5, $a4, $a1 ; LA64-NEXT: nor $a5, $a5, $zero @@ -658,7 +635,6 @@ define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind { ; LA32-LABEL: atomicrmw_nand_i32_acquire: ; LA32: # %bb.0: ; LA32-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a2, $a0, 0 ; LA32-NEXT: and $a3, $a2, $a1 ; LA32-NEXT: nor $a3, $a3, $zero @@ -671,7 +647,6 @@ define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind { ; LA64-LABEL: atomicrmw_nand_i32_acquire: ; LA64: # %bb.0: ; LA64-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: ll.w $a2, $a0, 0 ; LA64-NEXT: and $a3, $a2, $a1 ; LA64-NEXT: nor $a3, $a3, $zero @@ -698,7 +673,6 @@ define i64 @atomicrmw_nand_i64_acquire(ptr %a, i64 %b) nounwind { ; LA64-LABEL: atomicrmw_nand_i64_acquire: ; LA64: # %bb.0: ; LA64-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: dbar 0 ; LA64-NEXT: ll.d $a2, $a0, 0 ; LA64-NEXT: and $a3, $a2, $a1 ; LA64-NEXT: nor $a3, $a3, $zero @@ -722,7 +696,6 @@ define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind { ; LA32-NEXT: orn $a1, $a1, $a3 ; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a3, $a0, 0 ; LA32-NEXT: and $a4, $a3, $a1 ; LA32-NEXT: sc.w $a4, $a0, 0 @@ -759,7 +732,6 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind { ; LA32-NEXT: orn $a1, $a1, $a2 ; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a2, $a0, 0 ; LA32-NEXT: and $a4, $a2, $a1 ; LA32-NEXT: sc.w $a4, $a0, 0 @@ -789,7 +761,6 @@ define i32 @atomicrmw_and_i32_acquire(ptr %a, i32 %b) nounwind { ; LA32-LABEL: atomicrmw_and_i32_acquire: ; LA32: # %bb.0: ; LA32-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a2, $a0, 0 ; LA32-NEXT: and $a3, $a2, $a1 ; LA32-NEXT: sc.w $a3, $a0, 0 @@ -835,7 +806,6 @@ define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind { ; LA32-NEXT: sll.w $a1, $a1, $a2 ; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a3, $a0, 0 ; LA32-NEXT: or $a4, $a3, $a1 ; LA32-NEXT: sc.w $a4, $a0, 0 @@ -865,7 +835,6 @@ define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind { ; LA32-NEXT: sll.w $a1, $a1, $a2 ; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a3, $a0, 0 ; LA32-NEXT: or $a4, $a3, $a1 ; LA32-NEXT: sc.w $a4, $a0, 0 @@ -891,7 +860,6 @@ define i32 @atomicrmw_or_i32_acquire(ptr %a, i32 %b) nounwind { ; LA32-LABEL: atomicrmw_or_i32_acquire: ; LA32: # %bb.0: ; LA32-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a2, $a0, 0 ; LA32-NEXT: or $a3, $a2, $a1 ; LA32-NEXT: sc.w $a3, $a0, 0 @@ -937,7 +905,6 @@ define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind { ; LA32-NEXT: sll.w $a1, $a1, $a2 ; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a3, $a0, 0 ; LA32-NEXT: xor $a4, $a3, $a1 ; LA32-NEXT: sc.w $a4, $a0, 0 @@ -967,7 +934,6 @@ define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind { ; LA32-NEXT: sll.w $a1, $a1, $a2 ; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 ; LA32-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a3, $a0, 0 ; LA32-NEXT: xor $a4, $a3, $a1 ; LA32-NEXT: sc.w $a4, $a0, 0 @@ -993,7 +959,6 @@ define i32 @atomicrmw_xor_i32_acquire(ptr %a, i32 %b) nounwind { ; LA32-LABEL: atomicrmw_xor_i32_acquire: ; LA32: # %bb.0: ; LA32-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: dbar 0 ; LA32-NEXT: ll.w $a2, $a0, 0 ; LA32-NEXT: xor $a3, $a2, $a1 ; LA32-NEXT: sc.w $a3, $a0, 0 diff --git a/llvm/unittests/Target/LoongArch/InstSizes.cpp b/llvm/unittests/Target/LoongArch/InstSizes.cpp index e027f256825b6a..f38af4a8d915f4 100644 --- a/llvm/unittests/Target/LoongArch/InstSizes.cpp +++ b/llvm/unittests/Target/LoongArch/InstSizes.cpp @@ -121,7 +121,7 @@ TEST(InstSizes, AtomicPseudo) { " dead early-clobber renamable $r10, dead early-clobber renamable $r11 = PseudoAtomicLoadAdd32 renamable $r7, renamable $r6, renamable $r8\n" " dead early-clobber renamable $r5, dead early-clobber renamable $r9, dead early-clobber renamable $r10 = PseudoMaskedAtomicLoadUMax32 renamable $r7, renamable $r6, renamable $r8, 4\n" " early-clobber renamable $r9, dead early-clobber renamable $r10, dead early-clobber renamable $r11 = PseudoMaskedAtomicLoadMax32 killed renamable $r6, killed renamable $r5, killed renamable $r7, killed renamable $r8, 4\n" - " dead early-clobber renamable $r5, dead early-clobber renamable $r9 = PseudoCmpXchg32 renamable $r7, renamable $r4, renamable $r6\n" + " dead early-clobber renamable $r5, dead early-clobber renamable $r9 = PseudoCmpXchg32 renamable $r7, renamable $r4, renamable $r6, 4\n" " dead early-clobber renamable $r5, dead early-clobber renamable $r9 = PseudoMaskedCmpXchg32 killed renamable $r7, killed renamable $r4, killed renamable $r6, killed renamable $r8, 4\n", // clang-format on [](LoongArchInstrInfo &II, MachineFunction &MF) {