Skip to content

Commit

Permalink
[AArch64] Generate the CASP instruction for 128-bit cmpxchg
Browse files Browse the repository at this point in the history
The Large System Extension added an atomic compare-and-swap instruction
that operates on a pair of 64-bit registers, which we can use to
implement a 128-bit cmpxchg.

Because i128 is not a legal type for AArch64 we have to do all of the
instruction selection in C++, and the instruction requires even/odd
register pairs, so we have to wrap it in REG_SEQUENCE and EXTRACT_SUBREG
nodes. This is very similar to what we do for 64-bit cmpxchg in the ARM
backend.

Differential revision: https://reviews.llvm.org/D42104

llvm-svn: 323634
  • Loading branch information
ostannard committed Jan 29, 2018
1 parent eaf5172 commit a9d2e00
Show file tree
Hide file tree
Showing 3 changed files with 153 additions and 3 deletions.
74 changes: 71 additions & 3 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Expand Up @@ -10655,11 +10655,79 @@ static std::pair<SDValue, SDValue> splitInt128(SDValue N, SelectionDAG &DAG) {
return std::make_pair(Lo, Hi);
}

// Create an even/odd pair of X registers holding integer value V.
static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
SDLoc dl(V.getNode());
SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i64);
SDValue VHi = DAG.getAnyExtOrTrunc(
DAG.getNode(ISD::SRL, dl, MVT::i128, V, DAG.getConstant(64, dl, MVT::i64)),
dl, MVT::i64);
if (DAG.getDataLayout().isBigEndian())
std::swap (VLo, VHi);
SDValue RegClass =
DAG.getTargetConstant(AArch64::XSeqPairsClassRegClassID, dl, MVT::i32);
SDValue SubReg0 = DAG.getTargetConstant(AArch64::sube64, dl, MVT::i32);
SDValue SubReg1 = DAG.getTargetConstant(AArch64::subo64, dl, MVT::i32);
const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
return SDValue(
DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
}

static void ReplaceCMP_SWAP_128Results(SDNode *N,
SmallVectorImpl<SDValue> & Results,
SelectionDAG &DAG) {
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG,
const AArch64Subtarget *Subtarget) {
assert(N->getValueType(0) == MVT::i128 &&
"AtomicCmpSwap on types less than 128 should be legal");

if (Subtarget->hasLSE()) {
// LSE has a 128-bit compare and swap (CASP), but i128 is not a legal type,
// so lower it here, wrapped in REG_SEQUENCE and EXTRACT_SUBREG.
SDValue Ops[] = {
createGPRPairNode(DAG, N->getOperand(2)), // Compare value
createGPRPairNode(DAG, N->getOperand(3)), // Store value
N->getOperand(1), // Ptr
N->getOperand(0), // Chain in
};

MachineFunction &MF = DAG.getMachineFunction();
MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1);
MemOp[0] = cast<MemSDNode>(N)->getMemOperand();

unsigned Opcode;
switch (MemOp[0]->getOrdering()) {
case AtomicOrdering::Monotonic:
Opcode = AArch64::CASPX;
break;
case AtomicOrdering::Acquire:
Opcode = AArch64::CASPAX;
break;
case AtomicOrdering::Release:
Opcode = AArch64::CASPLX;
break;
case AtomicOrdering::AcquireRelease:
case AtomicOrdering::SequentiallyConsistent:
Opcode = AArch64::CASPALX;
break;
default:
llvm_unreachable("Unexpected ordering!");
}

MachineSDNode *CmpSwap = DAG.getMachineNode(
Opcode, SDLoc(N), DAG.getVTList(MVT::Untyped, MVT::Other), Ops);
CmpSwap->setMemRefs(MemOp, MemOp + 1);

unsigned SubReg1 = AArch64::sube64, SubReg2 = AArch64::subo64;
if (DAG.getDataLayout().isBigEndian())
std::swap(SubReg1, SubReg2);
Results.push_back(DAG.getTargetExtractSubreg(SubReg1, SDLoc(N), MVT::i64,
SDValue(CmpSwap, 0)));
Results.push_back(DAG.getTargetExtractSubreg(SubReg2, SDLoc(N), MVT::i64,
SDValue(CmpSwap, 0)));
Results.push_back(SDValue(CmpSwap, 1)); // Chain out
return;
}

auto Desired = splitInt128(N->getOperand(2), DAG);
auto New = splitInt128(N->getOperand(3), DAG);
SDValue Ops[] = {N->getOperand(1), Desired.first, Desired.second,
Expand Down Expand Up @@ -10718,7 +10786,7 @@ void AArch64TargetLowering::ReplaceNodeResults(
// Let normal code take care of it by not adding anything to Results.
return;
case ISD::ATOMIC_CMP_SWAP:
ReplaceCMP_SWAP_128Results(N, Results, DAG);
ReplaceCMP_SWAP_128Results(N, Results, DAG, Subtarget);
return;
}
}
Expand Down
20 changes: 20 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
Expand Up @@ -2592,6 +2592,16 @@ void AArch64InstrInfo::storeRegToStackSlot(
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Opc = AArch64::ST1Twov1d;
Offset = false;
} else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
BuildMI(MBB, MBBI, DL, get(AArch64::STPXi))
.addReg(TRI->getSubReg(SrcReg, AArch64::sube64),
getKillRegState(isKill))
.addReg(TRI->getSubReg(SrcReg, AArch64::subo64),
getKillRegState(isKill))
.addFrameIndex(FI)
.addImm(0)
.addMemOperand(MMO);
return;
}
break;
case 24:
Expand Down Expand Up @@ -2690,6 +2700,16 @@ void AArch64InstrInfo::loadRegFromStackSlot(
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Opc = AArch64::LD1Twov1d;
Offset = false;
} else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
BuildMI(MBB, MBBI, DL, get(AArch64::LDPXi))
.addReg(TRI->getSubReg(DestReg, AArch64::sube64),
getDefRegState(true))
.addReg(TRI->getSubReg(DestReg, AArch64::subo64),
getDefRegState(true))
.addFrameIndex(FI)
.addImm(0)
.addMemOperand(MMO);
return;
}
break;
case 24:
Expand Down
62 changes: 62 additions & 0 deletions llvm/test/CodeGen/AArch64/atomic-ops-lse.ll
@@ -1,4 +1,5 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+lse < %s | FileCheck %s
; RUN: llc -mtriple=aarch64_be-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+lse < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+lse < %s | FileCheck %s --check-prefix=CHECK-REG
; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mcpu=saphira < %s | FileCheck %s

Expand All @@ -11,6 +12,7 @@
@var16 = global i16 0
@var32 = global i32 0
@var64 = global i64 0
@var128 = global i128 0

define i8 @test_atomic_load_add_i8(i8 %offset) nounwind {
; CHECK-LABEL: test_atomic_load_add_i8:
Expand Down Expand Up @@ -713,6 +715,21 @@ define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
ret i64 %old
}

define i128 @test_atomic_cmpxchg_i128(i128 %wanted, i128 %new) nounwind {
; CHECK-LABEL: test_atomic_cmpxchg_i128:
%pair = cmpxchg i128* @var128, i128 %wanted, i128 %new acquire acquire
%old = extractvalue { i128, i1 } %pair, 0

; CHECK-NOT: dmb
; CHECK: adrp [[TMPADDR:x[0-9]+]], var128
; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var128

; CHECK: caspa x0, x1, x2, x3, [x[[ADDR]]]
; CHECK-NOT: dmb

ret i128 %old
}

define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind {
; CHECK-LABEL: test_atomic_load_sub_i8:
%old = atomicrmw sub i8* @var8, i8 %offset seq_cst
Expand Down Expand Up @@ -1705,6 +1722,21 @@ define i64 @test_atomic_cmpxchg_i64_acquire(i64 %wanted, i64 %new) nounwind {
ret i64 %old
}

define i128 @test_atomic_cmpxchg_i128_acquire(i128 %wanted, i128 %new) nounwind {
; CHECK-LABEL: test_atomic_cmpxchg_i128_acquire:
%pair = cmpxchg i128* @var128, i128 %wanted, i128 %new acquire acquire
%old = extractvalue { i128, i1 } %pair, 0

; CHECK-NOT: dmb
; CHECK: adrp [[TMPADDR:x[0-9]+]], var128
; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var128

; CHECK: caspa x0, x1, x2, x3, [x[[ADDR]]]
; CHECK-NOT: dmb

ret i128 %old
}

define i8 @test_atomic_cmpxchg_i8_monotonic(i8 %wanted, i8 %new) nounwind {
; CHECK-LABEL: test_atomic_cmpxchg_i8_monotonic:
%pair = cmpxchg i8* @var8, i8 %wanted, i8 %new monotonic monotonic
Expand Down Expand Up @@ -1765,6 +1797,21 @@ define i64 @test_atomic_cmpxchg_i64_monotonic(i64 %wanted, i64 %new) nounwind {
ret i64 %old
}

define i128 @test_atomic_cmpxchg_i128_monotonic(i128 %wanted, i128 %new) nounwind {
; CHECK-LABEL: test_atomic_cmpxchg_i128_monotonic:
%pair = cmpxchg i128* @var128, i128 %wanted, i128 %new monotonic monotonic
%old = extractvalue { i128, i1 } %pair, 0

; CHECK-NOT: dmb
; CHECK: adrp [[TMPADDR:x[0-9]+]], var128
; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var128

; CHECK: casp x0, x1, x2, x3, [x[[ADDR]]]
; CHECK-NOT: dmb

ret i128 %old
}

define i8 @test_atomic_cmpxchg_i8_seq_cst(i8 %wanted, i8 %new) nounwind {
; CHECK-LABEL: test_atomic_cmpxchg_i8_seq_cst:
%pair = cmpxchg i8* @var8, i8 %wanted, i8 %new seq_cst seq_cst
Expand Down Expand Up @@ -1825,6 +1872,21 @@ define i64 @test_atomic_cmpxchg_i64_seq_cst(i64 %wanted, i64 %new) nounwind {
ret i64 %old
}

define i128 @test_atomic_cmpxchg_i128_seq_cst(i128 %wanted, i128 %new) nounwind {
; CHECK-LABEL: test_atomic_cmpxchg_i128_seq_cst:
%pair = cmpxchg i128* @var128, i128 %wanted, i128 %new seq_cst seq_cst
%old = extractvalue { i128, i1 } %pair, 0

; CHECK-NOT: dmb
; CHECK: adrp [[TMPADDR:x[0-9]+]], var128
; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var128

; CHECK: caspal x0, x1, x2, x3, [x[[ADDR]]]
; CHECK-NOT: dmb

ret i128 %old
}

define i8 @test_atomic_load_max_i8_acq_rel(i8 %offset) nounwind {
; CHECK-LABEL: test_atomic_load_max_i8_acq_rel:
%old = atomicrmw max i8* @var8, i8 %offset acq_rel
Expand Down

0 comments on commit a9d2e00

Please sign in to comment.