397 changes: 397 additions & 0 deletions llvm/lib/Target/ARM/ARMAtomicExpandPass.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,397 @@
//===-- ARMAtomicExpandPass.cpp - Expand atomic instructions --------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains a pass (at IR level) to replace atomic instructions with
// appropriate (intrinsic-based) ldrex/strex loops.
//
//===----------------------------------------------------------------------===//

#define DEBUG_TYPE "arm-atomic-expand"
#include "ARM.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;

namespace {
class ARMAtomicExpandPass : public FunctionPass {
const TargetLowering *TLI;
public:
static char ID; // Pass identification, replacement for typeid
explicit ARMAtomicExpandPass(const TargetMachine *TM = 0)
: FunctionPass(ID), TLI(TM->getTargetLowering()) {}

bool runOnFunction(Function &F) override;
bool expandAtomicInsts(Function &F);

bool expandAtomicLoad(LoadInst *LI);
bool expandAtomicStore(StoreInst *LI);
bool expandAtomicRMW(AtomicRMWInst *AI);
bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);

AtomicOrdering insertLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord);
void insertTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord);

/// Perform a load-linked operation on Addr, returning a "Value *" with the
/// corresponding pointee type. This may entail some non-trivial operations
/// to truncate or reconstruct illegal types since intrinsics must be legal
Value *loadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord);

/// Perform a store-conditional operation to Addr. Return the status of the
/// store: 0 if the it succeeded, non-zero otherwise.
Value *storeConditional(IRBuilder<> &Builder, Value *Val, Value *Addr,
AtomicOrdering Ord);

/// Return true if the given (atomic) instruction should be expanded by this
/// pass.
bool shouldExpandAtomic(Instruction *Inst);
};
}

char ARMAtomicExpandPass::ID = 0;

FunctionPass *llvm::createARMAtomicExpandPass(const TargetMachine *TM) {
return new ARMAtomicExpandPass(TM);
}

bool ARMAtomicExpandPass::runOnFunction(Function &F) {
SmallVector<Instruction *, 1> AtomicInsts;

// Changing control-flow while iterating through it is a bad idea, so gather a
// list of all atomic instructions before we start.
for (BasicBlock &BB : F)
for (Instruction &Inst : BB) {
if (isa<AtomicRMWInst>(&Inst) || isa<AtomicCmpXchgInst>(&Inst) ||
(isa<LoadInst>(&Inst) && cast<LoadInst>(&Inst)->isAtomic()) ||
(isa<StoreInst>(&Inst) && cast<StoreInst>(&Inst)->isAtomic()))
AtomicInsts.push_back(&Inst);
}

bool MadeChange = false;
for (Instruction *Inst : AtomicInsts) {
if (!shouldExpandAtomic(Inst))
continue;

if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst))
MadeChange |= expandAtomicRMW(AI);
else if (AtomicCmpXchgInst *CI = dyn_cast<AtomicCmpXchgInst>(Inst))
MadeChange |= expandAtomicCmpXchg(CI);
else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
MadeChange |= expandAtomicLoad(LI);
else if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
MadeChange |= expandAtomicStore(SI);
else
llvm_unreachable("Unknown atomic instruction");
}

return MadeChange;
}

bool ARMAtomicExpandPass::expandAtomicLoad(LoadInst *LI) {
// Load instructions don't actually need a leading fence, even in the
// SequentiallyConsistent case.
AtomicOrdering MemOpOrder =
TLI->getInsertFencesForAtomic() ? Monotonic : LI->getOrdering();

// The only 64-bit load guaranteed to be single-copy atomic by the ARM ARM is
// an ldrexd (A3.5.3).
IRBuilder<> Builder(LI);
Value *Val = loadLinked(Builder, LI->getPointerOperand(), MemOpOrder);

insertTrailingFence(Builder, LI->getOrdering());

LI->replaceAllUsesWith(Val);
LI->eraseFromParent();

return true;
}

bool ARMAtomicExpandPass::expandAtomicStore(StoreInst *SI) {
// The only atomic 64-bit store on ARM is an strexd that succeeds, which means
// we need a loop and the entire instruction is essentially an "atomicrmw
// xchg" that ignores the value loaded.
IRBuilder<> Builder(SI);
AtomicRMWInst *AI =
Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(),
SI->getValueOperand(), SI->getOrdering());
SI->eraseFromParent();

// Now we have an appropriate swap instruction, lower it as usual.
return expandAtomicRMW(AI);
}

bool ARMAtomicExpandPass::expandAtomicRMW(AtomicRMWInst *AI) {
AtomicOrdering Order = AI->getOrdering();
Value *Addr = AI->getPointerOperand();
BasicBlock *BB = AI->getParent();
Function *F = BB->getParent();
LLVMContext &Ctx = F->getContext();

// Given: atomicrmw some_op iN* %addr, iN %incr ordering
//
// The standard expansion we produce is:
// [...]
// fence?
// atomicrmw.start:
// %loaded = @load.linked(%addr)
// %new = some_op iN %loaded, %incr
// %stored = @store_conditional(%new, %addr)
// %try_again = icmp i32 ne %stored, 0
// br i1 %try_again, label %loop, label %atomicrmw.end
// atomicrmw.end:
// fence?
// [...]
BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);

// This grabs the DebugLoc from AI.
IRBuilder<> Builder(AI);

// The split call above "helpfully" added a branch at the end of BB (to the
// wrong place), but we might want a fence too. It's easiest to just remove
// the branch entirely.
std::prev(BB->end())->eraseFromParent();
Builder.SetInsertPoint(BB);
AtomicOrdering MemOpOrder = insertLeadingFence(Builder, Order);
Builder.CreateBr(LoopBB);

// Start the main loop block now that we've taken care of the preliminaries.
Builder.SetInsertPoint(LoopBB);
Value *Loaded = loadLinked(Builder, Addr, MemOpOrder);

Value *NewVal;
switch (AI->getOperation()) {
case AtomicRMWInst::Xchg:
NewVal = AI->getValOperand();
break;
case AtomicRMWInst::Add:
NewVal = Builder.CreateAdd(Loaded, AI->getValOperand(), "new");
break;
case AtomicRMWInst::Sub:
NewVal = Builder.CreateSub(Loaded, AI->getValOperand(), "new");
break;
case AtomicRMWInst::And:
NewVal = Builder.CreateAnd(Loaded, AI->getValOperand(), "new");
break;
case AtomicRMWInst::Nand:
NewVal = Builder.CreateAnd(Loaded, Builder.CreateNot(AI->getValOperand()),
"new");
break;
case AtomicRMWInst::Or:
NewVal = Builder.CreateOr(Loaded, AI->getValOperand(), "new");
break;
case AtomicRMWInst::Xor:
NewVal = Builder.CreateXor(Loaded, AI->getValOperand(), "new");
break;
case AtomicRMWInst::Max:
NewVal = Builder.CreateICmpSGT(Loaded, AI->getValOperand());
NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
break;
case AtomicRMWInst::Min:
NewVal = Builder.CreateICmpSLE(Loaded, AI->getValOperand());
NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
break;
case AtomicRMWInst::UMax:
NewVal = Builder.CreateICmpUGT(Loaded, AI->getValOperand());
NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
break;
case AtomicRMWInst::UMin:
NewVal = Builder.CreateICmpULE(Loaded, AI->getValOperand());
NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
break;
default:
llvm_unreachable("Unknown atomic op");
}

Value *StoreSuccess = storeConditional(Builder, NewVal, Addr, MemOpOrder);
Value *TryAgain = Builder.CreateICmpNE(
StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);

Builder.SetInsertPoint(ExitBB, ExitBB->begin());
insertTrailingFence(Builder, Order);

AI->replaceAllUsesWith(Loaded);
AI->eraseFromParent();

return true;
}

bool ARMAtomicExpandPass::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
AtomicOrdering Order = CI->getSuccessOrdering();
Value *Addr = CI->getPointerOperand();
BasicBlock *BB = CI->getParent();
Function *F = BB->getParent();
LLVMContext &Ctx = F->getContext();

// Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
//
// The standard expansion we produce is:
// [...]
// fence?
// cmpxchg.start:
// %loaded = @load.linked(%addr)
// %should_store = icmp eq %loaded, %desired
// br i1 %should_store, label %cmpxchg.trystore, label %cmpxchg.end
// cmpxchg.trystore:
// %stored = @store_conditional(%new, %addr)
// %try_again = icmp i32 ne %stored, 0
// br i1 %try_again, label %loop, label %cmpxchg.end
// cmpxchg.end:
// fence?
// [...]
BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end");
BasicBlock *TryStoreBB =
BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ExitBB);
BasicBlock *LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB);

// This grabs the DebugLoc from CI
IRBuilder<> Builder(CI);

// The split call above "helpfully" added a branch at the end of BB (to the
// wrong place), but we might want a fence too. It's easiest to just remove
// the branch entirely.
std::prev(BB->end())->eraseFromParent();
Builder.SetInsertPoint(BB);
AtomicOrdering MemOpOrder = insertLeadingFence(Builder, Order);
Builder.CreateBr(LoopBB);

// Start the main loop block now that we've taken care of the preliminaries.
Builder.SetInsertPoint(LoopBB);
Value *Loaded = loadLinked(Builder, Addr, MemOpOrder);
Value *ShouldStore =
Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store");
Builder.CreateCondBr(ShouldStore, TryStoreBB, ExitBB);

Builder.SetInsertPoint(TryStoreBB);
Value *StoreSuccess =
storeConditional(Builder, CI->getNewValOperand(), Addr, MemOpOrder);
Value *TryAgain = Builder.CreateICmpNE(
StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);

// Finally, make sure later instructions don't get reordered with a fence if
// necessary.
Builder.SetInsertPoint(ExitBB, ExitBB->begin());
insertTrailingFence(Builder, Order);

CI->replaceAllUsesWith(Loaded);
CI->eraseFromParent();

return true;
}

Value *ARMAtomicExpandPass::loadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
Type *ValTy = cast<PointerType>(Addr->getType())->getElementType();
bool IsAcquire =
Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent;

// Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
// intrinsic must return {i32, i32} and we have to recombine them into a
// single i64 here.
if (ValTy->getPrimitiveSizeInBits() == 64) {
Intrinsic::ID Int =
IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int);

Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi");

Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
return Builder.CreateOr(
Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64");
}

Type *Tys[] = { Addr->getType() };
Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int, Tys);

return Builder.CreateTruncOrBitCast(
Builder.CreateCall(Ldrex, Addr),
cast<PointerType>(Addr->getType())->getElementType());
}

Value *ARMAtomicExpandPass::storeConditional(IRBuilder<> &Builder, Value *Val,
Value *Addr, AtomicOrdering Ord) {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
bool IsRelease =
Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent;

// Since the intrinsics must have legal type, the i64 intrinsics take two
// parameters: "i32, i32". We must marshal Val into the appropriate form
// before the call.
if (Val->getType()->getPrimitiveSizeInBits() == 64) {
Intrinsic::ID Int =
IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;
Function *Strex = Intrinsic::getDeclaration(M, Int);
Type *Int32Ty = Type::getInt32Ty(M->getContext());

Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
return Builder.CreateCall3(Strex, Lo, Hi, Addr);
}

Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
Type *Tys[] = { Addr->getType() };
Function *Strex = Intrinsic::getDeclaration(M, Int, Tys);

return Builder.CreateCall2(
Strex, Builder.CreateZExtOrBitCast(
Val, Strex->getFunctionType()->getParamType(0)),
Addr);
}

AtomicOrdering ARMAtomicExpandPass::insertLeadingFence(IRBuilder<> &Builder,
AtomicOrdering Ord) {
if (!TLI->getInsertFencesForAtomic())
return Ord;

if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
Builder.CreateFence(Release);

// The exclusive operations don't need any barrier if we're adding separate
// fences.
return Monotonic;
}

void ARMAtomicExpandPass::insertTrailingFence(IRBuilder<> &Builder,
AtomicOrdering Ord) {
if (!TLI->getInsertFencesForAtomic())
return;

if (Ord == Acquire || Ord == AcquireRelease)
Builder.CreateFence(Acquire);
else if (Ord == SequentiallyConsistent)
Builder.CreateFence(SequentiallyConsistent);
}

bool ARMAtomicExpandPass::shouldExpandAtomic(Instruction *Inst) {
// Loads and stores less than 64-bits are already atomic; ones above that
// are doomed anyway, so defer to the default libcall and blame the OS when
// things go wrong:
if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 64;
else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
return LI->getType()->getPrimitiveSizeInBits() == 64;

// For the real atomic operations, we have ldrex/strex up to 64 bits.
return Inst->getType()->getPrimitiveSizeInBits() <= 64;
}
113 changes: 0 additions & 113 deletions llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -252,8 +252,6 @@ class ARMDAGToDAGISel : public SelectionDAGISel {

SDNode *SelectConcatVector(SDNode *N);

SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32, unsigned Op64);

/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
Expand Down Expand Up @@ -2411,38 +2409,6 @@ SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1));
}

SDNode *ARMDAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8,
unsigned Op16,unsigned Op32,
unsigned Op64) {
// Mostly direct translation to the given operations, except that we preserve
// the AtomicOrdering for use later on.
AtomicSDNode *AN = cast<AtomicSDNode>(Node);
EVT VT = AN->getMemoryVT();

unsigned Op;
SDVTList VTs = CurDAG->getVTList(AN->getValueType(0), MVT::Other);
if (VT == MVT::i8)
Op = Op8;
else if (VT == MVT::i16)
Op = Op16;
else if (VT == MVT::i32)
Op = Op32;
else if (VT == MVT::i64) {
Op = Op64;
VTs = CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other);
} else
llvm_unreachable("Unexpected atomic operation");

SmallVector<SDValue, 6> Ops;
for (unsigned i = 1; i < AN->getNumOperands(); ++i)
Ops.push_back(AN->getOperand(i));

Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32));
Ops.push_back(AN->getOperand(0)); // Chain moves to the end

return CurDAG->SelectNodeTo(Node, Op, VTs, &Ops[0], Ops.size());
}

SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDLoc dl(N);

Expand Down Expand Up @@ -3320,85 +3286,6 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {

case ISD::CONCAT_VECTORS:
return SelectConcatVector(N);

case ISD::ATOMIC_LOAD:
if (cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i64)
return SelectAtomic(N, 0, 0, 0, ARM::ATOMIC_LOAD_I64);
else
break;

case ISD::ATOMIC_LOAD_ADD:
return SelectAtomic(N,
ARM::ATOMIC_LOAD_ADD_I8,
ARM::ATOMIC_LOAD_ADD_I16,
ARM::ATOMIC_LOAD_ADD_I32,
ARM::ATOMIC_LOAD_ADD_I64);
case ISD::ATOMIC_LOAD_SUB:
return SelectAtomic(N,
ARM::ATOMIC_LOAD_SUB_I8,
ARM::ATOMIC_LOAD_SUB_I16,
ARM::ATOMIC_LOAD_SUB_I32,
ARM::ATOMIC_LOAD_SUB_I64);
case ISD::ATOMIC_LOAD_AND:
return SelectAtomic(N,
ARM::ATOMIC_LOAD_AND_I8,
ARM::ATOMIC_LOAD_AND_I16,
ARM::ATOMIC_LOAD_AND_I32,
ARM::ATOMIC_LOAD_AND_I64);
case ISD::ATOMIC_LOAD_OR:
return SelectAtomic(N,
ARM::ATOMIC_LOAD_OR_I8,
ARM::ATOMIC_LOAD_OR_I16,
ARM::ATOMIC_LOAD_OR_I32,
ARM::ATOMIC_LOAD_OR_I64);
case ISD::ATOMIC_LOAD_XOR:
return SelectAtomic(N,
ARM::ATOMIC_LOAD_XOR_I8,
ARM::ATOMIC_LOAD_XOR_I16,
ARM::ATOMIC_LOAD_XOR_I32,
ARM::ATOMIC_LOAD_XOR_I64);
case ISD::ATOMIC_LOAD_NAND:
return SelectAtomic(N,
ARM::ATOMIC_LOAD_NAND_I8,
ARM::ATOMIC_LOAD_NAND_I16,
ARM::ATOMIC_LOAD_NAND_I32,
ARM::ATOMIC_LOAD_NAND_I64);
case ISD::ATOMIC_LOAD_MIN:
return SelectAtomic(N,
ARM::ATOMIC_LOAD_MIN_I8,
ARM::ATOMIC_LOAD_MIN_I16,
ARM::ATOMIC_LOAD_MIN_I32,
ARM::ATOMIC_LOAD_MIN_I64);
case ISD::ATOMIC_LOAD_MAX:
return SelectAtomic(N,
ARM::ATOMIC_LOAD_MAX_I8,
ARM::ATOMIC_LOAD_MAX_I16,
ARM::ATOMIC_LOAD_MAX_I32,
ARM::ATOMIC_LOAD_MAX_I64);
case ISD::ATOMIC_LOAD_UMIN:
return SelectAtomic(N,
ARM::ATOMIC_LOAD_UMIN_I8,
ARM::ATOMIC_LOAD_UMIN_I16,
ARM::ATOMIC_LOAD_UMIN_I32,
ARM::ATOMIC_LOAD_UMIN_I64);
case ISD::ATOMIC_LOAD_UMAX:
return SelectAtomic(N,
ARM::ATOMIC_LOAD_UMAX_I8,
ARM::ATOMIC_LOAD_UMAX_I16,
ARM::ATOMIC_LOAD_UMAX_I32,
ARM::ATOMIC_LOAD_UMAX_I64);
case ISD::ATOMIC_SWAP:
return SelectAtomic(N,
ARM::ATOMIC_SWAP_I8,
ARM::ATOMIC_SWAP_I16,
ARM::ATOMIC_SWAP_I32,
ARM::ATOMIC_SWAP_I64);
case ISD::ATOMIC_CMP_SWAP:
return SelectAtomic(N,
ARM::ATOMIC_CMP_SWAP_I8,
ARM::ATOMIC_CMP_SWAP_I16,
ARM::ATOMIC_CMP_SWAP_I32,
ARM::ATOMIC_CMP_SWAP_I64);
}

return SelectCode(N);
Expand Down
750 changes: 3 additions & 747 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp

Large diffs are not rendered by default.

23 changes: 0 additions & 23 deletions llvm/lib/Target/ARM/ARMISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -548,29 +548,6 @@ namespace llvm {

SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const;

MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned Size) const;
MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned Size,
unsigned BinOpcode) const;
MachineBasicBlock *EmitAtomicBinary64(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned Op1,
unsigned Op2,
bool NeedsCarry = false,
bool IsCmpxchg = false,
bool IsMinMax = false,
ARMCC::CondCodes CC = ARMCC::AL) const;
MachineBasicBlock * EmitAtomicBinaryMinMax(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned Size,
bool signExtend,
ARMCC::CondCodes Cond) const;
MachineBasicBlock *EmitAtomicLoad64(MachineInstr *MI,
MachineBasicBlock *BB) const;

void SetupEntryBlockForSjLj(MachineInstr *MI,
MachineBasicBlock *MBB,
MachineBasicBlock *DispatchBB, int FI) const;
Expand Down
203 changes: 0 additions & 203 deletions llvm/lib/Target/ARM/ARMInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -4336,209 +4336,6 @@ let usesCustomInserter = 1, Defs = [CPSR] in {
// Pseudo instruction that combines movs + predicated rsbmi
// to implement integer ABS
def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>;

// Atomic pseudo-insts which will be lowered to ldrex/strex loops.
// (64-bit pseudos use a hand-written selection code).
let mayLoad = 1, mayStore = 1 in {
def ATOMIC_LOAD_ADD_I8 : PseudoInst<
(outs GPR:$dst),
(ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_SUB_I8 : PseudoInst<
(outs GPR:$dst),
(ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_AND_I8 : PseudoInst<
(outs GPR:$dst),
(ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_OR_I8 : PseudoInst<
(outs GPR:$dst),
(ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_XOR_I8 : PseudoInst<
(outs GPR:$dst),
(ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_NAND_I8 : PseudoInst<
(outs GPR:$dst),
(ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_MIN_I8 : PseudoInst<
(outs GPR:$dst),
(ins GPR:$ptr, GPR:$val, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_MAX_I8 : PseudoInst<
(outs GPR:$dst),
(ins GPR:$ptr, GPR:$val, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_UMIN_I8 : PseudoInst<
(outs GPR:$dst),
(ins GPR:$ptr, GPR:$val, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_UMAX_I8 : PseudoInst<
(outs GPR:$dst),
(ins GPR:$ptr, GPR:$val, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_SWAP_I8 : PseudoInst<
(outs GPR:$dst),
(ins GPR:$ptr, GPR:$new, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_CMP_SWAP_I8 : PseudoInst<
(outs GPR:$dst),
(ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_ADD_I16 : PseudoInst<
(outs GPR:$dst),
(ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_SUB_I16 : PseudoInst<
(outs GPR:$dst),
(ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_AND_I16 : PseudoInst<
(outs GPR:$dst),
(ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_OR_I16 : PseudoInst<
(outs GPR:$dst),
(ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_XOR_I16 : PseudoInst<
(outs GPR:$dst),
(ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_NAND_I16 : PseudoInst<
(outs GPR:$dst),
(ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_MIN_I16 : PseudoInst<
(outs GPR:$dst),
(ins GPR:$ptr, GPR:$val, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_MAX_I16 : PseudoInst<
(outs GPR:$dst),
(ins GPR:$ptr, GPR:$val, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_UMIN_I16 : PseudoInst<
(outs GPR:$dst),
(ins GPR:$ptr, GPR:$val, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_UMAX_I16 : PseudoInst<
(outs GPR:$dst),
(ins GPR:$ptr, GPR:$val, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_SWAP_I16 : PseudoInst<
(outs GPR:$dst),
(ins GPR:$ptr, GPR:$new, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_CMP_SWAP_I16 : PseudoInst<
(outs GPR:$dst),
(ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_ADD_I32 : PseudoInst<
(outs GPR:$dst),
(ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_SUB_I32 : PseudoInst<
(outs GPR:$dst),
(ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_AND_I32 : PseudoInst<
(outs GPR:$dst),
(ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_OR_I32 : PseudoInst<
(outs GPR:$dst),
(ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_XOR_I32 : PseudoInst<
(outs GPR:$dst),
(ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_NAND_I32 : PseudoInst<
(outs GPR:$dst),
(ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_MIN_I32 : PseudoInst<
(outs GPR:$dst),
(ins GPR:$ptr, GPR:$val, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_MAX_I32 : PseudoInst<
(outs GPR:$dst),
(ins GPR:$ptr, GPR:$val, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_UMIN_I32 : PseudoInst<
(outs GPR:$dst),
(ins GPR:$ptr, GPR:$val, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_UMAX_I32 : PseudoInst<
(outs GPR:$dst),
(ins GPR:$ptr, GPR:$val, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_SWAP_I32 : PseudoInst<
(outs GPR:$dst),
(ins GPR:$ptr, GPR:$new, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_CMP_SWAP_I32 : PseudoInst<
(outs GPR:$dst),
(ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_ADD_I64 : PseudoInst<
(outs GPR:$dst1, GPR:$dst2),
(ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_SUB_I64 : PseudoInst<
(outs GPR:$dst1, GPR:$dst2),
(ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_AND_I64 : PseudoInst<
(outs GPR:$dst1, GPR:$dst2),
(ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_OR_I64 : PseudoInst<
(outs GPR:$dst1, GPR:$dst2),
(ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_XOR_I64 : PseudoInst<
(outs GPR:$dst1, GPR:$dst2),
(ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_NAND_I64 : PseudoInst<
(outs GPR:$dst1, GPR:$dst2),
(ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_MIN_I64 : PseudoInst<
(outs GPR:$dst1, GPR:$dst2),
(ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_MAX_I64 : PseudoInst<
(outs GPR:$dst1, GPR:$dst2),
(ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_UMIN_I64 : PseudoInst<
(outs GPR:$dst1, GPR:$dst2),
(ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_LOAD_UMAX_I64 : PseudoInst<
(outs GPR:$dst1, GPR:$dst2),
(ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_SWAP_I64 : PseudoInst<
(outs GPR:$dst1, GPR:$dst2),
(ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
NoItinerary, []>;
def ATOMIC_CMP_SWAP_I64 : PseudoInst<
(outs GPR:$dst1, GPR:$dst2),
(ins GPR:$addr, GPR:$cmp1, GPR:$cmp2,
GPR:$set1, GPR:$set2, i32imm:$ordering),
NoItinerary, []>;
}
let mayLoad = 1 in
def ATOMIC_LOAD_I64 : PseudoInst<
(outs GPR:$dst1, GPR:$dst2),
(ins GPR:$addr, i32imm:$ordering),
NoItinerary, []>;
}

let usesCustomInserter = 1 in {
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/ARM/ARMTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,10 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) {
}

bool ARMPassConfig::addPreISel() {
const ARMSubtarget *Subtarget = &getARMSubtarget();
if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only())
addPass(createARMAtomicExpandPass(TM));

if (TM->getOptLevel() != CodeGenOpt::None)
addPass(createGlobalMergePass(TM));

Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/ARM/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ add_public_tablegen_target(ARMCommonTableGen)
add_llvm_target(ARMCodeGen
A15SDOptimizer.cpp
ARMAsmPrinter.cpp
ARMAtomicExpandPass.cpp
ARMBaseInstrInfo.cpp
ARMBaseRegisterInfo.cpp
ARMCodeEmitter.cpp
Expand Down
155 changes: 114 additions & 41 deletions llvm/test/CodeGen/ARM/atomic-64bit.ll
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ define i64 @test3(i64* %ptr, i64 %val) {
; CHECK-LABEL: test3:
; CHECK: dmb {{ish$}}
; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
; CHECK: and [[REG3:(r[0-9]?[02468])]], [[REG1]]
; CHECK: and [[REG4:(r[0-9]?[13579])]], [[REG2]]
; CHECK-DAG: and [[REG3:(r[0-9]?[02468])]], [[REG1]]
; CHECK-DAG: and [[REG4:(r[0-9]?[13579])]], [[REG2]]
; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK: cmp
; CHECK: bne
Expand All @@ -65,8 +65,8 @@ define i64 @test3(i64* %ptr, i64 %val) {
; CHECK-THUMB-LABEL: test3:
; CHECK-THUMB: dmb {{ish$}}
; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
; CHECK-THUMB: and.w [[REG3:[a-z0-9]+]], [[REG1]]
; CHECK-THUMB: and.w [[REG4:[a-z0-9]+]], [[REG2]]
; CHECK-THUMB-DAG: and.w [[REG3:[a-z0-9]+]], [[REG1]]
; CHECK-THUMB-DAG: and.w [[REG4:[a-z0-9]+]], [[REG2]]
; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK-THUMB: cmp
; CHECK-THUMB: bne
Expand All @@ -80,8 +80,8 @@ define i64 @test4(i64* %ptr, i64 %val) {
; CHECK-LABEL: test4:
; CHECK: dmb {{ish$}}
; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
; CHECK: orr [[REG3:(r[0-9]?[02468])]], [[REG1]]
; CHECK: orr [[REG4:(r[0-9]?[13579])]], [[REG2]]
; CHECK-DAG: orr [[REG3:(r[0-9]?[02468])]], [[REG1]]
; CHECK-DAG: orr [[REG4:(r[0-9]?[13579])]], [[REG2]]
; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK: cmp
; CHECK: bne
Expand All @@ -90,8 +90,8 @@ define i64 @test4(i64* %ptr, i64 %val) {
; CHECK-THUMB-LABEL: test4:
; CHECK-THUMB: dmb {{ish$}}
; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
; CHECK-THUMB: orr.w [[REG3:[a-z0-9]+]], [[REG1]]
; CHECK-THUMB: orr.w [[REG4:[a-z0-9]+]], [[REG2]]
; CHECK-THUMB-DAG: orr.w [[REG3:[a-z0-9]+]], [[REG1]]
; CHECK-THUMB-DAG: orr.w [[REG4:[a-z0-9]+]], [[REG2]]
; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK-THUMB: cmp
; CHECK-THUMB: bne
Expand All @@ -105,8 +105,8 @@ define i64 @test5(i64* %ptr, i64 %val) {
; CHECK-LABEL: test5:
; CHECK: dmb {{ish$}}
; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
; CHECK: eor [[REG3:(r[0-9]?[02468])]], [[REG1]]
; CHECK: eor [[REG4:(r[0-9]?[13579])]], [[REG2]]
; CHECK-DAG: eor [[REG3:(r[0-9]?[02468])]], [[REG1]]
; CHECK-DAG: eor [[REG4:(r[0-9]?[13579])]], [[REG2]]
; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK: cmp
; CHECK: bne
Expand All @@ -115,8 +115,8 @@ define i64 @test5(i64* %ptr, i64 %val) {
; CHECK-THUMB-LABEL: test5:
; CHECK-THUMB: dmb {{ish$}}
; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
; CHECK-THUMB: eor.w [[REG3:[a-z0-9]+]], [[REG1]]
; CHECK-THUMB: eor.w [[REG4:[a-z0-9]+]], [[REG2]]
; CHECK-THUMB-DAG: eor.w [[REG3:[a-z0-9]+]], [[REG1]]
; CHECK-THUMB-DAG: eor.w [[REG4:[a-z0-9]+]], [[REG2]]
; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK-THUMB: cmp
; CHECK-THUMB: bne
Expand Down Expand Up @@ -151,8 +151,9 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
; CHECK-LABEL: test7:
; CHECK: dmb {{ish$}}
; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
; CHECK: cmp [[REG1]]
; CHECK: cmpeq [[REG2]]
; CHECK-DAG: eor [[MISMATCH_LO:r[0-9]+]], [[REG1]], r1
; CHECK-DAG: eor [[MISMATCH_HI:r[0-9]+]], [[REG2]], r2
; CHECK: orrs {{r[0-9]+}}, [[MISMATCH_LO]], [[MISMATCH_HI]]
; CHECK: bne
; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}
; CHECK: cmp
Expand All @@ -162,9 +163,9 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
; CHECK-THUMB-LABEL: test7:
; CHECK-THUMB: dmb {{ish$}}
; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
; CHECK-THUMB: cmp [[REG1]]
; CHECK-THUMB: it eq
; CHECK-THUMB: cmpeq [[REG2]]
; CHECK-THUMB-DAG: eor.w [[MISMATCH_LO:[a-z0-9]+]], [[REG1]], r2
; CHECK-THUMB-DAG: eor.w [[MISMATCH_HI:[a-z0-9]+]], [[REG2]], r3
; CHECK-THUMB: orrs [[MISMATCH_HI]], [[MISMATCH_LO]]
; CHECK-THUMB: bne
; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}}
; CHECK-THUMB: cmp
Expand Down Expand Up @@ -216,9 +217,18 @@ define i64 @test10(i64* %ptr, i64 %val) {
; CHECK-LABEL: test10:
; CHECK: dmb {{ish$}}
; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]]
; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]]
; CHECK: blt
; CHECK: mov [[CARRY_LO:[a-z0-9]+]], #0
; CHECK: mov [[CARRY_HI:[a-z0-9]+]], #0
; CHECK: mov [[OUT_HI:[a-z0-9]+]], r2
; CHECK: cmp [[REG1]], r1
; CHECK: movwls [[CARRY_LO]], #1
; CHECK: cmp [[REG2]], r2
; CHECK: movwle [[CARRY_HI]], #1
; CHECK: moveq [[CARRY_HI]], [[CARRY_LO]]
; CHECK: cmp [[CARRY_HI]], #0
; CHECK: movne [[OUT_HI]], [[REG2]]
; CHECK: mov [[OUT_LO:[a-z0-9]+]], r1
; CHECK: movne [[OUT_LO]], [[REG1]]
; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK: cmp
; CHECK: bne
Expand All @@ -227,9 +237,18 @@ define i64 @test10(i64* %ptr, i64 %val) {
; CHECK-THUMB-LABEL: test10:
; CHECK-THUMB: dmb {{ish$}}
; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]]
; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]]
; CHECK-THUMB: blt
; CHECK-THUMB: mov.w [[CARRY_LO:[a-z0-9]+]], #0
; CHECK-THUMB: movs [[CARRY_HI:[a-z0-9]+]], #0
; CHECK-THUMB: cmp [[REG1]], r2
; CHECK-THUMB: movls.w [[CARRY_LO]], #1
; CHECK-THUMB: cmp [[REG2]], r3
; CHECK-THUMB: movle [[CARRY_HI]], #1
; CHECK-THUMB: moveq [[CARRY_HI]], [[CARRY_LO]]
; CHECK-THUMB: mov [[OUT_HI:[a-z0-9]+]], r3
; CHECK-THUMB: cmp [[CARRY_HI]], #0
; CHECK-THUMB: mov [[OUT_LO:[a-z0-9]+]], r2
; CHECK-THUMB: movne [[OUT_HI]], [[REG2]]
; CHECK-THUMB: movne [[OUT_LO]], [[REG1]]
; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK-THUMB: cmp
; CHECK-THUMB: bne
Expand All @@ -243,9 +262,18 @@ define i64 @test11(i64* %ptr, i64 %val) {
; CHECK-LABEL: test11:
; CHECK: dmb {{ish$}}
; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]]
; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]]
; CHECK: blo
; CHECK: mov [[CARRY_LO:[a-z0-9]+]], #0
; CHECK: mov [[CARRY_HI:[a-z0-9]+]], #0
; CHECK: mov [[OUT_HI:[a-z0-9]+]], r2
; CHECK: cmp [[REG1]], r1
; CHECK: movwls [[CARRY_LO]], #1
; CHECK: cmp [[REG2]], r2
; CHECK: movwls [[CARRY_HI]], #1
; CHECK: moveq [[CARRY_HI]], [[CARRY_LO]]
; CHECK: cmp [[CARRY_HI]], #0
; CHECK: movne [[OUT_HI]], [[REG2]]
; CHECK: mov [[OUT_LO:[a-z0-9]+]], r1
; CHECK: movne [[OUT_LO]], [[REG1]]
; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK: cmp
; CHECK: bne
Expand All @@ -255,9 +283,18 @@ define i64 @test11(i64* %ptr, i64 %val) {
; CHECK-THUMB-LABEL: test11:
; CHECK-THUMB: dmb {{ish$}}
; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]]
; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]]
; CHECK-THUMB: blo
; CHECK-THUMB: mov.w [[CARRY_LO:[a-z0-9]+]], #0
; CHECK-THUMB: movs [[CARRY_HI:[a-z0-9]+]], #0
; CHECK-THUMB: cmp [[REG1]], r2
; CHECK-THUMB: movls.w [[CARRY_LO]], #1
; CHECK-THUMB: cmp [[REG2]], r3
; CHECK-THUMB: movls [[CARRY_HI]], #1
; CHECK-THUMB: moveq [[CARRY_HI]], [[CARRY_LO]]
; CHECK-THUMB: mov [[OUT_HI:[a-z0-9]+]], r3
; CHECK-THUMB: cmp [[CARRY_HI]], #0
; CHECK-THUMB: mov [[OUT_LO:[a-z0-9]+]], r2
; CHECK-THUMB: movne [[OUT_HI]], [[REG2]]
; CHECK-THUMB: movne [[OUT_LO]], [[REG1]]
; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK-THUMB: cmp
; CHECK-THUMB: bne
Expand All @@ -271,9 +308,18 @@ define i64 @test12(i64* %ptr, i64 %val) {
; CHECK-LABEL: test12:
; CHECK: dmb {{ish$}}
; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]]
; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]]
; CHECK: bge
; CHECK: mov [[CARRY_LO:[a-z0-9]+]], #0
; CHECK: mov [[CARRY_HI:[a-z0-9]+]], #0
; CHECK: mov [[OUT_HI:[a-z0-9]+]], r2
; CHECK: cmp [[REG1]], r1
; CHECK: movwhi [[CARRY_LO]], #1
; CHECK: cmp [[REG2]], r2
; CHECK: movwgt [[CARRY_HI]], #1
; CHECK: moveq [[CARRY_HI]], [[CARRY_LO]]
; CHECK: cmp [[CARRY_HI]], #0
; CHECK: movne [[OUT_HI]], [[REG2]]
; CHECK: mov [[OUT_LO:[a-z0-9]+]], r1
; CHECK: movne [[OUT_LO]], [[REG1]]
; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK: cmp
; CHECK: bne
Expand All @@ -282,9 +328,18 @@ define i64 @test12(i64* %ptr, i64 %val) {
; CHECK-THUMB-LABEL: test12:
; CHECK-THUMB: dmb {{ish$}}
; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]]
; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]]
; CHECK-THUMB: bge
; CHECK-THUMB: mov.w [[CARRY_LO:[a-z0-9]+]], #0
; CHECK-THUMB: movs [[CARRY_HI:[a-z0-9]+]], #0
; CHECK-THUMB: cmp [[REG1]], r2
; CHECK-THUMB: movhi.w [[CARRY_LO]], #1
; CHECK-THUMB: cmp [[REG2]], r3
; CHECK-THUMB: movgt [[CARRY_HI]], #1
; CHECK-THUMB: moveq [[CARRY_HI]], [[CARRY_LO]]
; CHECK-THUMB: mov [[OUT_HI:[a-z0-9]+]], r3
; CHECK-THUMB: cmp [[CARRY_HI]], #0
; CHECK-THUMB: mov [[OUT_LO:[a-z0-9]+]], r2
; CHECK-THUMB: movne [[OUT_HI]], [[REG2]]
; CHECK-THUMB: movne [[OUT_LO]], [[REG1]]
; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK-THUMB: cmp
; CHECK-THUMB: bne
Expand All @@ -298,9 +353,18 @@ define i64 @test13(i64* %ptr, i64 %val) {
; CHECK-LABEL: test13:
; CHECK: dmb {{ish$}}
; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]]
; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]]
; CHECK: bhs
; CHECK: mov [[CARRY_LO:[a-z0-9]+]], #0
; CHECK: mov [[CARRY_HI:[a-z0-9]+]], #0
; CHECK: mov [[OUT_HI:[a-z0-9]+]], r2
; CHECK: cmp [[REG1]], r1
; CHECK: movwhi [[CARRY_LO]], #1
; CHECK: cmp [[REG2]], r2
; CHECK: movwhi [[CARRY_HI]], #1
; CHECK: moveq [[CARRY_HI]], [[CARRY_LO]]
; CHECK: cmp [[CARRY_HI]], #0
; CHECK: movne [[OUT_HI]], [[REG2]]
; CHECK: mov [[OUT_LO:[a-z0-9]+]], r1
; CHECK: movne [[OUT_LO]], [[REG1]]
; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK: cmp
; CHECK: bne
Expand All @@ -309,9 +373,18 @@ define i64 @test13(i64* %ptr, i64 %val) {
; CHECK-THUMB-LABEL: test13:
; CHECK-THUMB: dmb {{ish$}}
; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]]
; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]]
; CHECK-THUMB: bhs
; CHECK-THUMB: mov.w [[CARRY_LO:[a-z0-9]+]], #0
; CHECK-THUMB: movs [[CARRY_HI:[a-z0-9]+]], #0
; CHECK-THUMB: cmp [[REG1]], r2
; CHECK-THUMB: movhi.w [[CARRY_LO]], #1
; CHECK-THUMB: cmp [[REG2]], r3
; CHECK-THUMB: movhi [[CARRY_HI]], #1
; CHECK-THUMB: moveq [[CARRY_HI]], [[CARRY_LO]]
; CHECK-THUMB: mov [[OUT_HI:[a-z0-9]+]], r3
; CHECK-THUMB: cmp [[CARRY_HI]], #0
; CHECK-THUMB: mov [[OUT_LO:[a-z0-9]+]], r2
; CHECK-THUMB: movne [[OUT_HI]], [[REG2]]
; CHECK-THUMB: movne [[OUT_LO]], [[REG1]]
; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK-THUMB: cmp
; CHECK-THUMB: bne
Expand Down
420 changes: 229 additions & 191 deletions llvm/test/CodeGen/ARM/atomic-ops-v8.ll

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion llvm/test/CodeGen/ARM/atomicrmw_minmax.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ define i32 @min(i8 %ctx, i32* %ptr, i32 %val)
{
; CHECK: ldrex
; CHECK: cmp [[old:r[0-9]*]], [[val:r[0-9]*]]
; CHECK: movlo {{r[0-9]*}}, [[old]]
; CHECK: movls {{r[0-9]*}}, [[old]]
%old = atomicrmw umin i32* %ptr, i32 %val monotonic
ret i32 %old
}