diff --git a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp index 8e7e2e5f73709..f1d487c87d6f3 100644 --- a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp +++ b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -326,6 +326,15 @@ void SparcAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) { void SparcAsmPrinter::emitInstruction(const MachineInstr *MI) { Sparc_MC::verifyInstructionPredicates(MI->getOpcode(), getSubtargetInfo().getFeatureBits()); + if (MI->isBundle()) { + const MachineBasicBlock *MBB = MI->getParent(); + MachineBasicBlock::const_instr_iterator I = ++MI->getIterator(); + while (I != MBB->instr_end() && I->isInsideBundle()) { + emitInstruction(&*I); + ++I; + } + return; + } switch (MI->getOpcode()) { default: break; diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp index dd221327dbdc6..ad307d49297a9 100644 --- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp +++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp @@ -33,6 +33,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" @@ -3562,3 +3563,28 @@ void SparcTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, if (!Node->hasAnyUseOfValue(0)) MI.getOperand(0).setReg(SP::G0); } + +Instruction *SparcTargetLowering::emitLeadingFence(IRBuilderBase &Builder, + Instruction *Inst, + AtomicOrdering Ord) const { + bool HasStoreSemantics = + isa(Inst); + if (HasStoreSemantics && isReleaseOrStronger(Ord)) + return Builder.CreateFence(AtomicOrdering::Release); + return nullptr; +} + +Instruction *SparcTargetLowering::emitTrailingFence(IRBuilderBase &Builder, + Instruction *Inst, + AtomicOrdering Ord) const { + // V8 loads already come with implicit acquire barrier so there's no need to + // emit it again. + bool HasLoadSemantics = isa(Inst); + if (Subtarget->isV9() && HasLoadSemantics && isAcquireOrStronger(Ord)) + return Builder.CreateFence(AtomicOrdering::Acquire); + + // SC plain stores would need a trailing full barrier. + if (isa(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) + return Builder.CreateFence(Ord); + return nullptr; +} diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.h b/llvm/lib/Target/Sparc/SparcISelLowering.h index 4017beb88ff31..73bd8ff6b24a4 100644 --- a/llvm/lib/Target/Sparc/SparcISelLowering.h +++ b/llvm/lib/Target/Sparc/SparcISelLowering.h @@ -182,6 +182,11 @@ namespace llvm { bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override; + Instruction *emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, + AtomicOrdering Ord) const override; + Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, + AtomicOrdering Ord) const override; + bool shouldInsertFencesForAtomic(const Instruction *I) const override { // FIXME: We insert fences for each atomics and generate // sub-optimal code for PSO/TSO. (Approximately nobody uses any diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/llvm/lib/Target/Sparc/SparcInstrInfo.cpp index a7fbbd4044c11..52732de194b0f 100644 --- a/llvm/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/llvm/lib/Target/Sparc/SparcInstrInfo.cpp @@ -653,6 +653,22 @@ bool SparcInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { .addImm(Offset); return true; } + case SP::V8BAR: { + assert(!Subtarget.isV9() && + "V8BAR should not be emitted on V9 processors!"); + + // Emit stbar; ldstub [%sp-1], %g0 + // The sequence acts as a full barrier on V8 systems. + MachineBasicBlock &MBB = *MI.getParent(); + MachineInstr &InstSTBAR = + *BuildMI(MBB, MI, MI.getDebugLoc(), get(SP::STBAR)); + MachineInstr &InstLDSTUB = + *BuildMI(MBB, MI, MI.getDebugLoc(), get(SP::LDSTUBri), SP::G0) + .addReg(SP::O6) + .addImm(-1); + MIBundleBuilder(MBB, InstSTBAR, InstLDSTUB); + MBB.erase(MI); + } } return false; } diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.td b/llvm/lib/Target/Sparc/SparcInstrInfo.td index 1a32eafb0e83d..0495fbef94473 100644 --- a/llvm/lib/Target/Sparc/SparcInstrInfo.td +++ b/llvm/lib/Target/Sparc/SparcInstrInfo.td @@ -561,6 +561,9 @@ class Pseudo pattern> let isPseudo = 1; } +// Full memory barrier for V8. +def V8BAR : Pseudo<(outs), (ins), "!V8BAR", []>, Requires<[HasNoV9]>; + // GETPCX for PIC let Defs = [O7] in { def GETPCX : Pseudo<(outs getPCX:$getpcseq), (ins), "$getpcseq", [] >; @@ -1957,12 +1960,30 @@ def : Pat<(i32 (zextloadi1 ADDRri:$src)), (LDUBri ADDRri:$src)>; def : Pat<(store (i32 0), ADDRrr:$dst), (STrr ADDRrr:$dst, (i32 G0))>; def : Pat<(store (i32 0), ADDRri:$dst), (STri ADDRri:$dst, (i32 G0))>; -// store bar for all atomic_fence in V8. -let Predicates = [HasNoV9] in - def : Pat<(atomic_fence timm, timm), (STBAR)>; +// All load-type operations in V8 comes with implicit acquire semantics. +let Predicates = [HasNoV9] in { + // Acquire -> nop + def : Pat<(atomic_fence (i32 4), timm), (NOP)>; + // Release / AcqRel -> stbar + def : Pat<(atomic_fence (i32 5), timm), (STBAR)>; + // AcqRel and stronger -> stbar; ldstub [%sp-1], %g0 + def : Pat<(atomic_fence timm, timm), (V8BAR)>; +} -let Predicates = [HasV9] in +// We have to handle both 32 and 64-bit cases. +let Predicates = [HasV9] in { + // Acquire -> membar #LoadLoad | #LoadStore + def : Pat<(atomic_fence (i32 4), timm), (MEMBARi 0x5)>; + def : Pat<(atomic_fence (i64 4), timm), (MEMBARi 0x5)>; + // Release -> membar #LoadStore | #StoreStore + def : Pat<(atomic_fence (i32 5), timm), (MEMBARi 0xc)>; + def : Pat<(atomic_fence (i64 5), timm), (MEMBARi 0xc)>; + // AcqRel -> membar #LoadLoad | #LoadStore | #StoreStore + def : Pat<(atomic_fence (i32 6), timm), (MEMBARi 0xd)>; + def : Pat<(atomic_fence (i64 6), timm), (MEMBARi 0xd)>; + // SeqCst -> membar #StoreLoad | #LoadLoad | #LoadStore | #StoreStore def : Pat<(atomic_fence timm, timm), (MEMBARi 0xf)>; +} // atomic_load addr -> load addr def : Pat<(i32 (atomic_load_azext_8 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>; diff --git a/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll index 380a4a0a6b870..d1f1c46d9b8b1 100644 --- a/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll +++ b/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll @@ -5,7 +5,7 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { ; CHECK-LABEL: atomicrmw_uinc_wrap_i8: ; CHECK: .cfi_startproc ; CHECK-NEXT: ! %bb.0: -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: membar #LoadStore | #StoreStore ; CHECK-NEXT: and %o0, -4, %o2 ; CHECK-NEXT: mov 3, %o3 ; CHECK-NEXT: andn %o3, %o0, %o0 @@ -36,7 +36,7 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { ; CHECK-NEXT: nop ; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end ; CHECK-NEXT: srl %o4, %o0, %o0 -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: membar #LoadLoad | #LoadStore ; CHECK-NEXT: retl ; CHECK-NEXT: nop %result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst @@ -47,7 +47,7 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { ; CHECK-LABEL: atomicrmw_uinc_wrap_i16: ; CHECK: .cfi_startproc ; CHECK-NEXT: ! %bb.0: -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: membar #LoadStore | #StoreStore ; CHECK-NEXT: and %o0, -4, %o2 ; CHECK-NEXT: and %o0, 3, %o0 ; CHECK-NEXT: xor %o0, 2, %o0 @@ -79,7 +79,7 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { ; CHECK-NEXT: nop ; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end ; CHECK-NEXT: srl %o5, %o0, %o0 -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: membar #LoadLoad | #LoadStore ; CHECK-NEXT: retl ; CHECK-NEXT: nop %result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst @@ -90,7 +90,7 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { ; CHECK-LABEL: atomicrmw_uinc_wrap_i32: ; CHECK: .cfi_startproc ; CHECK-NEXT: ! %bb.0: -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: membar #LoadStore | #StoreStore ; CHECK-NEXT: ld [%o0], %o2 ; CHECK-NEXT: .LBB2_1: ! %atomicrmw.start ; CHECK-NEXT: ! =>This Inner Loop Header: Depth=1 @@ -106,7 +106,7 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { ; CHECK-NEXT: bne %icc, .LBB2_1 ; CHECK-NEXT: nop ; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: membar #LoadLoad | #LoadStore ; CHECK-NEXT: retl ; CHECK-NEXT: mov %o2, %o0 %result = atomicrmw uinc_wrap ptr %ptr, i32 %val seq_cst @@ -160,7 +160,7 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { ; CHECK-LABEL: atomicrmw_udec_wrap_i8: ; CHECK: .cfi_startproc ; CHECK-NEXT: ! %bb.0: -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: membar #LoadStore | #StoreStore ; CHECK-NEXT: and %o0, -4, %o2 ; CHECK-NEXT: mov 3, %o3 ; CHECK-NEXT: andn %o3, %o0, %o0 @@ -193,7 +193,7 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { ; CHECK-NEXT: nop ; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end ; CHECK-NEXT: srl %o5, %o0, %o0 -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: membar #LoadLoad | #LoadStore ; CHECK-NEXT: retl ; CHECK-NEXT: nop %result = atomicrmw udec_wrap ptr %ptr, i8 %val seq_cst @@ -204,7 +204,7 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { ; CHECK-LABEL: atomicrmw_udec_wrap_i16: ; CHECK: .cfi_startproc ; CHECK-NEXT: ! %bb.0: -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: membar #LoadStore | #StoreStore ; CHECK-NEXT: and %o0, -4, %o2 ; CHECK-NEXT: and %o0, 3, %o0 ; CHECK-NEXT: xor %o0, 2, %o0 @@ -238,7 +238,7 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { ; CHECK-NEXT: nop ; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end ; CHECK-NEXT: srl %g2, %o0, %o0 -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: membar #LoadLoad | #LoadStore ; CHECK-NEXT: retl ; CHECK-NEXT: nop %result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst @@ -249,7 +249,7 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { ; CHECK-LABEL: atomicrmw_udec_wrap_i32: ; CHECK: .cfi_startproc ; CHECK-NEXT: ! %bb.0: -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: membar #LoadStore | #StoreStore ; CHECK-NEXT: ld [%o0], %o2 ; CHECK-NEXT: .LBB6_1: ! %atomicrmw.start ; CHECK-NEXT: ! =>This Inner Loop Header: Depth=1 @@ -267,7 +267,7 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { ; CHECK-NEXT: bne %icc, .LBB6_1 ; CHECK-NEXT: nop ; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: membar #LoadLoad | #LoadStore ; CHECK-NEXT: retl ; CHECK-NEXT: mov %o2, %o0 %result = atomicrmw udec_wrap ptr %ptr, i32 %val seq_cst diff --git a/llvm/test/CodeGen/SPARC/atomics-ordering.ll b/llvm/test/CodeGen/SPARC/atomics-ordering.ll new file mode 100644 index 0000000000000..7c13ac2c5a221 --- /dev/null +++ b/llvm/test/CodeGen/SPARC/atomics-ordering.ll @@ -0,0 +1,446 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=sparc -verify-machineinstrs | FileCheck %s --check-prefixes=SPARC32 +; RUN: llc < %s -mtriple=sparc -mcpu=leon4 -verify-machineinstrs | FileCheck %s --check-prefixes=SPARC32-LEON4 +; RUN: llc < %s -mtriple=sparc -mcpu=v9 -verify-machineinstrs | FileCheck %s --check-prefixes=SPARC32-V9 +; RUN: llc < %s -mtriple=sparcv9 -verify-machineinstrs | FileCheck %s --check-prefixes=SPARC64 + +define i32 @load_acq(ptr %0) nounwind { +; SPARC32-LABEL: load_acq: +; SPARC32: ! %bb.0: +; SPARC32-NEXT: save %sp, -96, %sp +; SPARC32-NEXT: mov %i0, %o0 +; SPARC32-NEXT: call __atomic_load_4 +; SPARC32-NEXT: mov 2, %o1 +; SPARC32-NEXT: ret +; SPARC32-NEXT: restore %g0, %o0, %o0 +; +; SPARC32-LEON4-LABEL: load_acq: +; SPARC32-LEON4: ! %bb.0: +; SPARC32-LEON4-NEXT: retl +; SPARC32-LEON4-NEXT: ld [%o0], %o0 +; +; SPARC32-V9-LABEL: load_acq: +; SPARC32-V9: ! %bb.0: +; SPARC32-V9-NEXT: ld [%o0], %o0 +; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore +; SPARC32-V9-NEXT: retl +; SPARC32-V9-NEXT: nop +; +; SPARC64-LABEL: load_acq: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: ld [%o0], %o0 +; SPARC64-NEXT: membar #LoadLoad | #LoadStore +; SPARC64-NEXT: retl +; SPARC64-NEXT: nop + %2 = load atomic i32, ptr %0 acquire, align 4 + ret i32 %2 +} + +define i32 @load_sc(ptr %0) nounwind { +; SPARC32-LABEL: load_sc: +; SPARC32: ! %bb.0: +; SPARC32-NEXT: save %sp, -96, %sp +; SPARC32-NEXT: mov %i0, %o0 +; SPARC32-NEXT: call __atomic_load_4 +; SPARC32-NEXT: mov 5, %o1 +; SPARC32-NEXT: ret +; SPARC32-NEXT: restore %g0, %o0, %o0 +; +; SPARC32-LEON4-LABEL: load_sc: +; SPARC32-LEON4: ! %bb.0: +; SPARC32-LEON4-NEXT: retl +; SPARC32-LEON4-NEXT: ld [%o0], %o0 +; +; SPARC32-V9-LABEL: load_sc: +; SPARC32-V9: ! %bb.0: +; SPARC32-V9-NEXT: ld [%o0], %o0 +; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore +; SPARC32-V9-NEXT: retl +; SPARC32-V9-NEXT: nop +; +; SPARC64-LABEL: load_sc: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: ld [%o0], %o0 +; SPARC64-NEXT: membar #LoadLoad | #LoadStore +; SPARC64-NEXT: retl +; SPARC64-NEXT: nop + %2 = load atomic i32, ptr %0 seq_cst, align 4 + ret i32 %2 +} + +define void @store_rel(ptr %0, i32 %1) nounwind { +; SPARC32-LABEL: store_rel: +; SPARC32: ! %bb.0: +; SPARC32-NEXT: save %sp, -96, %sp +; SPARC32-NEXT: mov %i1, %o1 +; SPARC32-NEXT: mov %i0, %o0 +; SPARC32-NEXT: call __atomic_store_4 +; SPARC32-NEXT: mov 3, %o2 +; SPARC32-NEXT: ret +; SPARC32-NEXT: restore +; +; SPARC32-LEON4-LABEL: store_rel: +; SPARC32-LEON4: ! %bb.0: +; SPARC32-LEON4-NEXT: stbar +; SPARC32-LEON4-NEXT: retl +; SPARC32-LEON4-NEXT: st %o1, [%o0] +; +; SPARC32-V9-LABEL: store_rel: +; SPARC32-V9: ! %bb.0: +; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore +; SPARC32-V9-NEXT: retl +; SPARC32-V9-NEXT: st %o1, [%o0] +; +; SPARC64-LABEL: store_rel: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: membar #LoadStore | #StoreStore +; SPARC64-NEXT: retl +; SPARC64-NEXT: st %o1, [%o0] + store atomic i32 %1, ptr %0 release, align 4 + ret void +} + +define void @store_sc(ptr %0, i32 %1) nounwind { +; SPARC32-LABEL: store_sc: +; SPARC32: ! %bb.0: +; SPARC32-NEXT: save %sp, -96, %sp +; SPARC32-NEXT: mov %i1, %o1 +; SPARC32-NEXT: mov %i0, %o0 +; SPARC32-NEXT: call __atomic_store_4 +; SPARC32-NEXT: mov 5, %o2 +; SPARC32-NEXT: ret +; SPARC32-NEXT: restore +; +; SPARC32-LEON4-LABEL: store_sc: +; SPARC32-LEON4: ! %bb.0: +; SPARC32-LEON4-NEXT: stbar +; SPARC32-LEON4-NEXT: st %o1, [%o0] +; SPARC32-LEON4-NEXT: stbar +; SPARC32-LEON4-NEXT: ldstub [%sp+-1], %g0 +; SPARC32-LEON4-NEXT: retl +; SPARC32-LEON4-NEXT: nop +; +; SPARC32-V9-LABEL: store_sc: +; SPARC32-V9: ! %bb.0: +; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore +; SPARC32-V9-NEXT: st %o1, [%o0] +; SPARC32-V9-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; SPARC32-V9-NEXT: retl +; SPARC32-V9-NEXT: nop +; +; SPARC64-LABEL: store_sc: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: membar #LoadStore | #StoreStore +; SPARC64-NEXT: st %o1, [%o0] +; SPARC64-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; SPARC64-NEXT: retl +; SPARC64-NEXT: nop + store atomic i32 %1, ptr %0 seq_cst, align 4 + ret void +} + +define i32 @rmw_acq(ptr %0, i32 %1) nounwind { +; SPARC32-LABEL: rmw_acq: +; SPARC32: ! %bb.0: +; SPARC32-NEXT: save %sp, -96, %sp +; SPARC32-NEXT: mov %i1, %o1 +; SPARC32-NEXT: mov %i0, %o0 +; SPARC32-NEXT: call __atomic_exchange_4 +; SPARC32-NEXT: mov 2, %o2 +; SPARC32-NEXT: ret +; SPARC32-NEXT: restore %g0, %o0, %o0 +; +; SPARC32-LEON4-LABEL: rmw_acq: +; SPARC32-LEON4: ! %bb.0: +; SPARC32-LEON4-NEXT: swap [%o0], %o1 +; SPARC32-LEON4-NEXT: retl +; SPARC32-LEON4-NEXT: mov %o1, %o0 +; +; SPARC32-V9-LABEL: rmw_acq: +; SPARC32-V9: ! %bb.0: +; SPARC32-V9-NEXT: swap [%o0], %o1 +; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore +; SPARC32-V9-NEXT: retl +; SPARC32-V9-NEXT: mov %o1, %o0 +; +; SPARC64-LABEL: rmw_acq: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: swap [%o0], %o1 +; SPARC64-NEXT: membar #LoadLoad | #LoadStore +; SPARC64-NEXT: retl +; SPARC64-NEXT: mov %o1, %o0 + %3 = atomicrmw xchg ptr %0, i32 %1 acquire, align 4 + ret i32 %3 +} + +define i32 @rmw_rel(ptr %0, i32 %1) nounwind { +; SPARC32-LABEL: rmw_rel: +; SPARC32: ! %bb.0: +; SPARC32-NEXT: save %sp, -96, %sp +; SPARC32-NEXT: mov %i1, %o1 +; SPARC32-NEXT: mov %i0, %o0 +; SPARC32-NEXT: call __atomic_exchange_4 +; SPARC32-NEXT: mov 3, %o2 +; SPARC32-NEXT: ret +; SPARC32-NEXT: restore %g0, %o0, %o0 +; +; SPARC32-LEON4-LABEL: rmw_rel: +; SPARC32-LEON4: ! %bb.0: +; SPARC32-LEON4-NEXT: stbar +; SPARC32-LEON4-NEXT: swap [%o0], %o1 +; SPARC32-LEON4-NEXT: retl +; SPARC32-LEON4-NEXT: mov %o1, %o0 +; +; SPARC32-V9-LABEL: rmw_rel: +; SPARC32-V9: ! %bb.0: +; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore +; SPARC32-V9-NEXT: swap [%o0], %o1 +; SPARC32-V9-NEXT: retl +; SPARC32-V9-NEXT: mov %o1, %o0 +; +; SPARC64-LABEL: rmw_rel: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: membar #LoadStore | #StoreStore +; SPARC64-NEXT: swap [%o0], %o1 +; SPARC64-NEXT: retl +; SPARC64-NEXT: mov %o1, %o0 + %3 = atomicrmw xchg ptr %0, i32 %1 release, align 4 + ret i32 %3 +} + +define i32 @rmw_acq_rel(ptr %0, i32 %1) nounwind { +; SPARC32-LABEL: rmw_acq_rel: +; SPARC32: ! %bb.0: +; SPARC32-NEXT: save %sp, -96, %sp +; SPARC32-NEXT: mov %i1, %o1 +; SPARC32-NEXT: mov %i0, %o0 +; SPARC32-NEXT: call __atomic_exchange_4 +; SPARC32-NEXT: mov 4, %o2 +; SPARC32-NEXT: ret +; SPARC32-NEXT: restore %g0, %o0, %o0 +; +; SPARC32-LEON4-LABEL: rmw_acq_rel: +; SPARC32-LEON4: ! %bb.0: +; SPARC32-LEON4-NEXT: stbar +; SPARC32-LEON4-NEXT: swap [%o0], %o1 +; SPARC32-LEON4-NEXT: retl +; SPARC32-LEON4-NEXT: mov %o1, %o0 +; +; SPARC32-V9-LABEL: rmw_acq_rel: +; SPARC32-V9: ! %bb.0: +; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore +; SPARC32-V9-NEXT: swap [%o0], %o1 +; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore +; SPARC32-V9-NEXT: retl +; SPARC32-V9-NEXT: mov %o1, %o0 +; +; SPARC64-LABEL: rmw_acq_rel: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: membar #LoadStore | #StoreStore +; SPARC64-NEXT: swap [%o0], %o1 +; SPARC64-NEXT: membar #LoadLoad | #LoadStore +; SPARC64-NEXT: retl +; SPARC64-NEXT: mov %o1, %o0 + %3 = atomicrmw xchg ptr %0, i32 %1 acq_rel, align 4 + ret i32 %3 +} + +define i32 @rmw_sc(ptr %0, i32 %1) nounwind { +; SPARC32-LABEL: rmw_sc: +; SPARC32: ! %bb.0: +; SPARC32-NEXT: save %sp, -96, %sp +; SPARC32-NEXT: mov %i1, %o1 +; SPARC32-NEXT: mov %i0, %o0 +; SPARC32-NEXT: call __atomic_exchange_4 +; SPARC32-NEXT: mov 5, %o2 +; SPARC32-NEXT: ret +; SPARC32-NEXT: restore %g0, %o0, %o0 +; +; SPARC32-LEON4-LABEL: rmw_sc: +; SPARC32-LEON4: ! %bb.0: +; SPARC32-LEON4-NEXT: stbar +; SPARC32-LEON4-NEXT: swap [%o0], %o1 +; SPARC32-LEON4-NEXT: retl +; SPARC32-LEON4-NEXT: mov %o1, %o0 +; +; SPARC32-V9-LABEL: rmw_sc: +; SPARC32-V9: ! %bb.0: +; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore +; SPARC32-V9-NEXT: swap [%o0], %o1 +; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore +; SPARC32-V9-NEXT: retl +; SPARC32-V9-NEXT: mov %o1, %o0 +; +; SPARC64-LABEL: rmw_sc: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: membar #LoadStore | #StoreStore +; SPARC64-NEXT: swap [%o0], %o1 +; SPARC64-NEXT: membar #LoadLoad | #LoadStore +; SPARC64-NEXT: retl +; SPARC64-NEXT: mov %o1, %o0 + %3 = atomicrmw xchg ptr %0, i32 %1 seq_cst, align 4 + ret i32 %3 +} + +define i32 @cas_acq(ptr %0, i32 %1, i32 %2) nounwind { +; SPARC32-LABEL: cas_acq: +; SPARC32: ! %bb.0: +; SPARC32-NEXT: save %sp, -96, %sp +; SPARC32-NEXT: mov %i2, %o2 +; SPARC32-NEXT: mov %i0, %o0 +; SPARC32-NEXT: st %i1, [%fp+-4] +; SPARC32-NEXT: add %fp, -4, %o1 +; SPARC32-NEXT: mov 2, %o3 +; SPARC32-NEXT: call __atomic_compare_exchange_4 +; SPARC32-NEXT: mov %o3, %o4 +; SPARC32-NEXT: ld [%fp+-4], %i0 +; SPARC32-NEXT: ret +; SPARC32-NEXT: restore +; +; SPARC32-LEON4-LABEL: cas_acq: +; SPARC32-LEON4: ! %bb.0: +; SPARC32-LEON4-NEXT: casa [%o0] 10, %o1, %o2 +; SPARC32-LEON4-NEXT: retl +; SPARC32-LEON4-NEXT: mov %o2, %o0 +; +; SPARC32-V9-LABEL: cas_acq: +; SPARC32-V9: ! %bb.0: +; SPARC32-V9-NEXT: cas [%o0], %o1, %o2 +; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore +; SPARC32-V9-NEXT: retl +; SPARC32-V9-NEXT: mov %o2, %o0 +; +; SPARC64-LABEL: cas_acq: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: cas [%o0], %o1, %o2 +; SPARC64-NEXT: membar #LoadLoad | #LoadStore +; SPARC64-NEXT: retl +; SPARC64-NEXT: mov %o2, %o0 + %4 = cmpxchg ptr %0, i32 %1, i32 %2 acquire acquire, align 4 + %5 = extractvalue { i32, i1 } %4, 0 + ret i32 %5 +} + +define i32 @cas_rel(ptr %0, i32 %1, i32 %2) nounwind { +; SPARC32-LABEL: cas_rel: +; SPARC32: ! %bb.0: +; SPARC32-NEXT: save %sp, -96, %sp +; SPARC32-NEXT: mov %i2, %o2 +; SPARC32-NEXT: mov %i0, %o0 +; SPARC32-NEXT: st %i1, [%fp+-4] +; SPARC32-NEXT: add %fp, -4, %o1 +; SPARC32-NEXT: mov 3, %o3 +; SPARC32-NEXT: call __atomic_compare_exchange_4 +; SPARC32-NEXT: mov %g0, %o4 +; SPARC32-NEXT: ld [%fp+-4], %i0 +; SPARC32-NEXT: ret +; SPARC32-NEXT: restore +; +; SPARC32-LEON4-LABEL: cas_rel: +; SPARC32-LEON4: ! %bb.0: +; SPARC32-LEON4-NEXT: stbar +; SPARC32-LEON4-NEXT: casa [%o0] 10, %o1, %o2 +; SPARC32-LEON4-NEXT: retl +; SPARC32-LEON4-NEXT: mov %o2, %o0 +; +; SPARC32-V9-LABEL: cas_rel: +; SPARC32-V9: ! %bb.0: +; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore +; SPARC32-V9-NEXT: cas [%o0], %o1, %o2 +; SPARC32-V9-NEXT: retl +; SPARC32-V9-NEXT: mov %o2, %o0 +; +; SPARC64-LABEL: cas_rel: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: membar #LoadStore | #StoreStore +; SPARC64-NEXT: cas [%o0], %o1, %o2 +; SPARC64-NEXT: retl +; SPARC64-NEXT: mov %o2, %o0 + %4 = cmpxchg ptr %0, i32 %1, i32 %2 release monotonic, align 4 + %5 = extractvalue { i32, i1 } %4, 0 + ret i32 %5 +} + +define i32 @cas_acq_rel(ptr %0, i32 %1, i32 %2) nounwind { +; SPARC32-LABEL: cas_acq_rel: +; SPARC32: ! %bb.0: +; SPARC32-NEXT: save %sp, -96, %sp +; SPARC32-NEXT: mov %i2, %o2 +; SPARC32-NEXT: mov %i0, %o0 +; SPARC32-NEXT: st %i1, [%fp+-4] +; SPARC32-NEXT: add %fp, -4, %o1 +; SPARC32-NEXT: mov 4, %o3 +; SPARC32-NEXT: call __atomic_compare_exchange_4 +; SPARC32-NEXT: mov 2, %o4 +; SPARC32-NEXT: ld [%fp+-4], %i0 +; SPARC32-NEXT: ret +; SPARC32-NEXT: restore +; +; SPARC32-LEON4-LABEL: cas_acq_rel: +; SPARC32-LEON4: ! %bb.0: +; SPARC32-LEON4-NEXT: stbar +; SPARC32-LEON4-NEXT: casa [%o0] 10, %o1, %o2 +; SPARC32-LEON4-NEXT: retl +; SPARC32-LEON4-NEXT: mov %o2, %o0 +; +; SPARC32-V9-LABEL: cas_acq_rel: +; SPARC32-V9: ! %bb.0: +; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore +; SPARC32-V9-NEXT: cas [%o0], %o1, %o2 +; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore +; SPARC32-V9-NEXT: retl +; SPARC32-V9-NEXT: mov %o2, %o0 +; +; SPARC64-LABEL: cas_acq_rel: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: membar #LoadStore | #StoreStore +; SPARC64-NEXT: cas [%o0], %o1, %o2 +; SPARC64-NEXT: membar #LoadLoad | #LoadStore +; SPARC64-NEXT: retl +; SPARC64-NEXT: mov %o2, %o0 + %4 = cmpxchg ptr %0, i32 %1, i32 %2 acq_rel acquire, align 4 + %5 = extractvalue { i32, i1 } %4, 0 + ret i32 %5 +} + +define i32 @cas_sc(ptr %0, i32 %1, i32 %2) nounwind { +; SPARC32-LABEL: cas_sc: +; SPARC32: ! %bb.0: +; SPARC32-NEXT: save %sp, -96, %sp +; SPARC32-NEXT: mov %i2, %o2 +; SPARC32-NEXT: mov %i0, %o0 +; SPARC32-NEXT: st %i1, [%fp+-4] +; SPARC32-NEXT: add %fp, -4, %o1 +; SPARC32-NEXT: mov 5, %o3 +; SPARC32-NEXT: call __atomic_compare_exchange_4 +; SPARC32-NEXT: mov %o3, %o4 +; SPARC32-NEXT: ld [%fp+-4], %i0 +; SPARC32-NEXT: ret +; SPARC32-NEXT: restore +; +; SPARC32-LEON4-LABEL: cas_sc: +; SPARC32-LEON4: ! %bb.0: +; SPARC32-LEON4-NEXT: stbar +; SPARC32-LEON4-NEXT: casa [%o0] 10, %o1, %o2 +; SPARC32-LEON4-NEXT: retl +; SPARC32-LEON4-NEXT: mov %o2, %o0 +; +; SPARC32-V9-LABEL: cas_sc: +; SPARC32-V9: ! %bb.0: +; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore +; SPARC32-V9-NEXT: cas [%o0], %o1, %o2 +; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore +; SPARC32-V9-NEXT: retl +; SPARC32-V9-NEXT: mov %o2, %o0 +; +; SPARC64-LABEL: cas_sc: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: membar #LoadStore | #StoreStore +; SPARC64-NEXT: cas [%o0], %o1, %o2 +; SPARC64-NEXT: membar #LoadLoad | #LoadStore +; SPARC64-NEXT: retl +; SPARC64-NEXT: mov %o2, %o0 + %4 = cmpxchg ptr %0, i32 %1, i32 %2 seq_cst seq_cst, align 4 + %5 = extractvalue { i32, i1 } %4, 0 + ret i32 %5 +} diff --git a/llvm/test/Transforms/AtomicExpand/SPARC/partword.ll b/llvm/test/Transforms/AtomicExpand/SPARC/partword.ll index 3a306a4d98613..ccef61dc1f3fc 100644 --- a/llvm/test/Transforms/AtomicExpand/SPARC/partword.ll +++ b/llvm/test/Transforms/AtomicExpand/SPARC/partword.ll @@ -12,7 +12,7 @@ target triple = "sparcv9-unknown-unknown" define i8 @test_cmpxchg_i8(ptr %arg, i8 %old, i8 %new) { ; CHECK-LABEL: @test_cmpxchg_i8( ; CHECK-NEXT: entry: -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence release ; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[ARG:%.*]], i64 -4) ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP0]], 3 @@ -45,7 +45,7 @@ define i8 @test_cmpxchg_i8(ptr %arg, i8 %old, i8 %new) { ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 ; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { i8, i1 } poison, i8 [[EXTRACTED]], 0 ; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { i8, i1 } [[TMP17]], i1 [[TMP14]], 1 -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence acquire ; CHECK-NEXT: [[RET:%.*]] = extractvalue { i8, i1 } [[TMP18]], 0 ; CHECK-NEXT: ret i8 [[RET]] ; @@ -58,7 +58,7 @@ entry: define i16 @test_cmpxchg_i16(ptr %arg, i16 %old, i16 %new) { ; CHECK-LABEL: @test_cmpxchg_i16( ; CHECK-NEXT: entry: -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence release ; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[ARG:%.*]], i64 -4) ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP0]], 3 @@ -91,7 +91,7 @@ define i16 @test_cmpxchg_i16(ptr %arg, i16 %old, i16 %new) { ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 ; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { i16, i1 } poison, i16 [[EXTRACTED]], 0 ; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { i16, i1 } [[TMP17]], i1 [[TMP14]], 1 -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence acquire ; CHECK-NEXT: [[RET:%.*]] = extractvalue { i16, i1 } [[TMP18]], 0 ; CHECK-NEXT: ret i16 [[RET]] ; @@ -104,7 +104,7 @@ entry: define i16 @test_add_i16(ptr %arg, i16 %val) { ; CHECK-LABEL: @test_add_i16( ; CHECK-NEXT: entry: -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence release ; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[ARG:%.*]], i64 -4) ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP0]], 3 @@ -130,7 +130,7 @@ define i16 @test_add_i16(ptr %arg, i16 %val) { ; CHECK: atomicrmw.end: ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence acquire ; CHECK-NEXT: ret i16 [[EXTRACTED]] ; entry: @@ -141,7 +141,7 @@ entry: define i16 @test_xor_i16(ptr %arg, i16 %val) { ; CHECK-LABEL: @test_xor_i16( ; CHECK-NEXT: entry: -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence release ; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[ARG:%.*]], i64 -4) ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP0]], 3 @@ -164,7 +164,7 @@ define i16 @test_xor_i16(ptr %arg, i16 %val) { ; CHECK: atomicrmw.end: ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence acquire ; CHECK-NEXT: ret i16 [[EXTRACTED]] ; entry: @@ -175,7 +175,7 @@ entry: define i16 @test_or_i16(ptr %arg, i16 %val) { ; CHECK-LABEL: @test_or_i16( ; CHECK-NEXT: entry: -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence release ; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[ARG:%.*]], i64 -4) ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP0]], 3 @@ -198,7 +198,7 @@ define i16 @test_or_i16(ptr %arg, i16 %val) { ; CHECK: atomicrmw.end: ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence acquire ; CHECK-NEXT: ret i16 [[EXTRACTED]] ; entry: @@ -209,7 +209,7 @@ entry: define i16 @test_and_i16(ptr %arg, i16 %val) { ; CHECK-LABEL: @test_and_i16( ; CHECK-NEXT: entry: -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence release ; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[ARG:%.*]], i64 -4) ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP0]], 3 @@ -233,7 +233,7 @@ define i16 @test_and_i16(ptr %arg, i16 %val) { ; CHECK: atomicrmw.end: ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence acquire ; CHECK-NEXT: ret i16 [[EXTRACTED]] ; entry: @@ -244,7 +244,7 @@ entry: define i16 @test_min_i16(ptr %arg, i16 %val) { ; CHECK-LABEL: @test_min_i16( ; CHECK-NEXT: entry: -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence release ; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[ARG:%.*]], i64 -4) ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP0]], 3 @@ -272,7 +272,7 @@ define i16 @test_min_i16(ptr %arg, i16 %val) { ; CHECK: atomicrmw.end: ; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence acquire ; CHECK-NEXT: ret i16 [[EXTRACTED3]] ; entry: @@ -282,7 +282,7 @@ entry: define half @test_atomicrmw_fadd_f16(ptr %ptr, half %value) { ; CHECK-LABEL: @test_atomicrmw_fadd_f16( -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence release ; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4) ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 @@ -312,7 +312,7 @@ define half @test_atomicrmw_fadd_f16(ptr %ptr, half %value) { ; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[EXTRACTED3]] to half -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence acquire ; CHECK-NEXT: ret half [[TMP8]] ; %res = atomicrmw fadd ptr %ptr, half %value seq_cst