45 changes: 45 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,12 @@ def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>;
def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>;
def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;

def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def AArch64stzg : SDNode<"AArch64ISD::STZG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def AArch64st2g : SDNode<"AArch64ISD::ST2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def AArch64stz2g : SDNode<"AArch64ISD::STZ2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;

//===----------------------------------------------------------------------===//

//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -1289,13 +1295,52 @@ defm STZG : MemTagStore<0b01, "stzg">;
defm ST2G : MemTagStore<0b10, "st2g">;
defm STZ2G : MemTagStore<0b11, "stz2g">;

def : Pat<(AArch64stg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
(STGOffset $Rn, $Rm, $imm)>;
def : Pat<(AArch64stzg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
(STZGOffset $Rn, $Rm, $imm)>;
def : Pat<(AArch64st2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
(ST2GOffset $Rn, $Rm, $imm)>;
def : Pat<(AArch64stz2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
(STZ2GOffset $Rn, $Rm, $imm)>;

defm STGP : StorePairOffset <0b01, 0, GPR64z, simm7s16, "stgp">;
def STGPpre : StorePairPreIdx <0b01, 0, GPR64z, simm7s16, "stgp">;
def STGPpost : StorePairPostIdx<0b01, 0, GPR64z, simm7s16, "stgp">;

def : Pat<(int_aarch64_stg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)),
(STGOffset GPR64:$Rt, GPR64sp:$Rn, simm9s16:$offset)>;

def : Pat<(int_aarch64_stgp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$imm), GPR64:$Rt, GPR64:$Rt2),
(STGPi $Rt, $Rt2, $Rn, $imm)>;

def IRGstack
: Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rsp, GPR64:$Rm), []>,
Sched<[]>;
def TAGPstack
: Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rn, uimm6s16:$imm6, GPR64sp:$Rm, imm0_15:$imm4), []>,
Sched<[]>;

// Explicit SP in the first operand prevents ShrinkWrap optimization
// from leaving this instruction out of the stack frame. When IRGstack
// is transformed into IRG, this operand is replaced with the actual
// register / expression for the tagged base pointer of the current function.
def : Pat<(int_aarch64_irg_sp i64:$Rm), (IRGstack SP, i64:$Rm)>;

// Large STG to be expanded into a loop. $Rm is the size, $Rn is start address.
// $Rn_wback is one past the end of the range.
let isCodeGenOnly=1, mayStore=1 in {
def STGloop
: Pseudo<(outs GPR64common:$Rm_wback, GPR64sp:$Rn_wback), (ins GPR64common:$Rm, GPR64sp:$Rn),
[], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,$Rm = $Rm_wback,@earlyclobber $Rm_wback" >,
Sched<[WriteAdr, WriteST]>;

def STZGloop
: Pseudo<(outs GPR64common:$Rm_wback, GPR64sp:$Rn_wback), (ins GPR64common:$Rm, GPR64sp:$Rn),
[], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,$Rm = $Rm_wback,@earlyclobber $Rm_wback" >,
Sched<[WriteAdr, WriteST]>;
}

} // Predicates = [HasMTE]

//===----------------------------------------------------------------------===//
Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,12 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
/// ForwardedMustTailRegParms - A list of virtual and physical registers
/// that must be forwarded to every musttail call.
SmallVector<ForwardedRegister, 1> ForwardedMustTailRegParms;

// Offset from SP-at-entry to the tagged base pointer.
// Tagged base pointer is set up to point to the first (lowest address) tagged
// stack slot.
unsigned TaggedBasePointerOffset;

public:
AArch64FunctionInfo() = default;

Expand Down Expand Up @@ -224,6 +230,13 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
return ForwardedMustTailRegParms;
}

unsigned getTaggedBasePointerOffset() const {
return TaggedBasePointerOffset;
}
void setTaggedBasePointerOffset(unsigned Offset) {
TaggedBasePointerOffset = Offset;
}

private:
// Hold the lists of LOHs.
MILOHContainer LOHContainerSet;
Expand Down
15 changes: 12 additions & 3 deletions llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -468,10 +468,19 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
return;
}

// Modify MI as necessary to handle as much of 'Offset' as possible
Offset = TFI->resolveFrameIndexReference(
MF, FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true);
if (MI.getOpcode() == AArch64::TAGPstack) {
// TAGPstack must use the virtual frame register in its 3rd operand.
const MachineFrameInfo &MFI = MF.getFrameInfo();
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
FrameReg = MI.getOperand(3).getReg();
Offset =
MFI.getObjectOffset(FrameIndex) + AFI->getTaggedBasePointerOffset();
} else {
Offset = TFI->resolveFrameIndexReference(
MF, FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true);
}

// Modify MI as necessary to handle as much of 'Offset' as possible
if (rewriteAArch64FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII))
return;

Expand Down
88 changes: 88 additions & 0 deletions llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,91 @@ bool AArch64SelectionDAGInfo::generateFMAsInMachineCombiner(
CodeGenOpt::Level OptLevel) const {
return OptLevel >= CodeGenOpt::Aggressive;
}

static const int kSetTagLoopThreshold = 176;

static SDValue EmitUnrolledSetTag(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Ptr, uint64_t ObjSize,
const MachineMemOperand *BaseMemOperand,
bool ZeroData) {
MachineFunction &MF = DAG.getMachineFunction();
unsigned ObjSizeScaled = ObjSize / 16;

SDValue TagSrc = Ptr;
if (Ptr.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Ptr)->getIndex();
Ptr = DAG.getTargetFrameIndex(FI, MVT::i64);
// A frame index operand may end up as [SP + offset] => it is fine to use SP
// register as the tag source.
TagSrc = DAG.getRegister(AArch64::SP, MVT::i64);
}

const unsigned OpCode1 = ZeroData ? AArch64ISD::STZG : AArch64ISD::STG;
const unsigned OpCode2 = ZeroData ? AArch64ISD::STZ2G : AArch64ISD::ST2G;

SmallVector<SDValue, 8> OutChains;
unsigned OffsetScaled = 0;
while (OffsetScaled < ObjSizeScaled) {
if (ObjSizeScaled - OffsetScaled >= 2) {
SDValue AddrNode = DAG.getMemBasePlusOffset(Ptr, OffsetScaled * 16, dl);
SDValue St = DAG.getMemIntrinsicNode(
OpCode2, dl, DAG.getVTList(MVT::Other),
{Chain, TagSrc, AddrNode},
MVT::v4i64,
MF.getMachineMemOperand(BaseMemOperand, OffsetScaled * 16, 16 * 2));
OffsetScaled += 2;
OutChains.push_back(St);
continue;
}

if (ObjSizeScaled - OffsetScaled > 0) {
SDValue AddrNode = DAG.getMemBasePlusOffset(Ptr, OffsetScaled * 16, dl);
SDValue St = DAG.getMemIntrinsicNode(
OpCode1, dl, DAG.getVTList(MVT::Other),
{Chain, TagSrc, AddrNode},
MVT::v2i64,
MF.getMachineMemOperand(BaseMemOperand, OffsetScaled * 16, 16));
OffsetScaled += 1;
OutChains.push_back(St);
}
}

SDValue Res = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
return Res;
}

SDValue AArch64SelectionDAGInfo::EmitTargetCodeForSetTag(
SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Addr,
SDValue Size, MachinePointerInfo DstPtrInfo, bool ZeroData) const {
uint64_t ObjSize = cast<ConstantSDNode>(Size)->getZExtValue();
assert(ObjSize % 16 == 0);

MachineFunction &MF = DAG.getMachineFunction();
MachineMemOperand *BaseMemOperand = MF.getMachineMemOperand(
DstPtrInfo, MachineMemOperand::MOStore, ObjSize, 16);

bool UseSetTagRangeLoop =
kSetTagLoopThreshold >= 0 && (int)ObjSize >= kSetTagLoopThreshold;
if (!UseSetTagRangeLoop)
return EmitUnrolledSetTag(DAG, dl, Chain, Addr, ObjSize, BaseMemOperand,
ZeroData);

if (ObjSize % 32 != 0) {
SDNode *St1 = DAG.getMachineNode(
ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex, dl,
{MVT::i64, MVT::Other},
{Addr, Addr, DAG.getTargetConstant(1, dl, MVT::i64), Chain});
DAG.setNodeMemRefs(cast<MachineSDNode>(St1), {BaseMemOperand});
ObjSize -= 16;
Addr = SDValue(St1, 0);
Chain = SDValue(St1, 1);
}

const EVT ResTys[] = {MVT::i64, MVT::i64, MVT::Other};
SDValue Ops[] = {DAG.getConstant(ObjSize, dl, MVT::i64), Addr, Chain};
SDNode *St = DAG.getMachineNode(
ZeroData ? AArch64::STZGloop : AArch64::STGloop, dl, ResTys, Ops);

DAG.setNodeMemRefs(cast<MachineSDNode>(St), {BaseMemOperand});
return SDValue(St, 2);
}
4 changes: 4 additions & 0 deletions llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ class AArch64SelectionDAGInfo : public SelectionDAGTargetInfo {
SDValue Chain, SDValue Dst, SDValue Src,
SDValue Size, unsigned Align, bool isVolatile,
MachinePointerInfo DstPtrInfo) const override;
SDValue EmitTargetCodeForSetTag(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Op1, SDValue Op2,
MachinePointerInfo DstPtrInfo,
bool ZeroData) const override;
bool generateFMAsInMachineCombiner(CodeGenOpt::Level OptLevel) const override;
};
}
Expand Down
16 changes: 16 additions & 0 deletions llvm/test/Analysis/ValueTracking/aarch64.irg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,22 @@ entry:
ret void
}

; CHECK-LABEL: define void @checkNonnullTagp(
define void @checkNonnullTagp(i8* %tag) {
; CHECK: %[[p:.*]] = call i8* @llvm.aarch64.tagp.p0i8(i8* nonnull %a, i8* %tag, i64 1)
; CHECK: %[[p2:.*]] = call i8* @llvm.aarch64.tagp.p0i8(i8* nonnull %[[p]], i8* %tag, i64 2)
; CHECK: call void @use(i8* nonnull %[[p2]])
entry:
%a = alloca i8, align 8

%p = call i8* @llvm.aarch64.tagp.p0i8(i8* %a, i8* %tag, i64 1)
%p2 = call i8* @llvm.aarch64.tagp.p0i8(i8* %p, i8* %tag, i64 2)
call void @use(i8* %p2)

ret void
}

declare i8* @llvm.aarch64.irg(i8*, i64)
declare i8* @llvm.aarch64.tagp.p0i8(i8*, i8*, i64)

declare void @use(i8*)
42 changes: 42 additions & 0 deletions llvm/test/CodeGen/AArch64/irg.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
; RUN: llc < %s -mtriple=aarch64 -mattr=+mte | FileCheck %s

define i8* @irg_imm16(i8* %p) {
entry:
; CHECK-LABEL: irg_imm16:
; CHECK: mov w[[R:[0-9]+]], #16
; CHECK: irg x0, x0, x[[R]]
; CHECK: ret
%q = call i8* @llvm.aarch64.irg(i8* %p, i64 16)
ret i8* %q
}

define i8* @irg_imm0(i8* %p) {
entry:
; CHECK-LABEL: irg_imm0:
; CHECK: irg x0, x0{{$}}
; CHECK: ret
%q = call i8* @llvm.aarch64.irg(i8* %p, i64 0)
ret i8* %q
}

define i8* @irg_reg(i8* %p, i64 %ex) {
entry:
; CHECK-LABEL: irg_reg:
; CHECK: irg x0, x0, x1
; CHECK: ret
%q = call i8* @llvm.aarch64.irg(i8* %p, i64 %ex)
ret i8* %q
}

; undef argument in irg is treated specially
define i8* @irg_sp() {
entry:
; CHECK-LABEL: irg_sp:
; CHECK: irg x0, sp{{$}}
; CHECK: ret
%q = call i8* @llvm.aarch64.irg.sp(i64 0)
ret i8* %q
}

declare i8* @llvm.aarch64.irg(i8* %p, i64 %exclude)
declare i8* @llvm.aarch64.irg.sp(i64 %exclude)
93 changes: 93 additions & 0 deletions llvm/test/CodeGen/AArch64/irg_sp_tagp.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
; RUN: llc < %s -mtriple=aarch64 -mattr=+mte | FileCheck %s

define i8* @small_alloca() {
entry:
; CHECK-LABEL: small_alloca:
; CHECK: irg [[R:x[0-9]+]], sp{{$}}
; CHECK-NEXT: addg x0, [[R]], #0, #1
; CHECK: ret
%a = alloca i8, align 16
%q = call i8* @llvm.aarch64.irg.sp(i64 0)
%q1 = call i8* @llvm.aarch64.tagp.p0i8(i8* %a, i8* %q, i64 1)
ret i8* %q1
}

; Two large allocas. One's offset overflows addg immediate.
define void @huge_allocas() {
entry:
; CHECK-LABEL: huge_allocas:
; CHECK: irg [[R:x[0-9]+]], sp{{$}}
; CHECK: add [[TMP:x[0-9]+]], [[R]], #3088
; CHECK: addg x0, [[TMP]], #1008, #1
; CHECK: addg x1, [[R]], #0, #2
; CHECK: bl use2
%a = alloca i8, i64 4096, align 16
%b = alloca i8, i64 4096, align 16
%base = call i8* @llvm.aarch64.irg.sp(i64 0)
%a_t = call i8* @llvm.aarch64.tagp.p0i8(i8* %a, i8* %base, i64 1)
%b_t = call i8* @llvm.aarch64.tagp.p0i8(i8* %b, i8* %base, i64 2)
call void @use2(i8* %a_t, i8* %b_t)
ret void
}

; Realigned stack frame. IRG uses value of SP after realignment,
; ADDG for the first stack allocation has offset 0.
define void @realign() {
entry:
; CHECK-LABEL: realign:
; CHECK: add x29, sp, #16
; CHECK: and sp, x{{[0-9]*}}, #0xffffffffffffffc0
; CHECK: irg [[R:x[0-9]+]], sp{{$}}
; CHECK: addg x0, [[R]], #0, #1
; CHECK: bl use
%a = alloca i8, i64 4096, align 64
%base = call i8* @llvm.aarch64.irg.sp(i64 0)
%a_t = call i8* @llvm.aarch64.tagp.p0i8(i8* %a, i8* %base, i64 1)
call void @use(i8* %a_t)
ret void
}

; With a dynamic alloca, IRG has to use FP with non-zero offset.
; ADDG offset for the single static alloca is still zero.
define void @dynamic_alloca(i64 %size) {
entry:
; CHECK-LABEL: dynamic_alloca:
; CHECK: sub [[R:x[0-9]+]], x29, #[[OFS:[0-9]+]]
; CHECK: irg [[R]], [[R]]
; CHECK: addg x1, [[R]], #0, #1
; CHECK: sub x0, x29, #[[OFS]]
; CHECK: bl use2
%base = call i8* @llvm.aarch64.irg.sp(i64 0)
%a = alloca i128, i64 %size, align 16
%b = alloca i8, i64 16, align 16
%b_t = call i8* @llvm.aarch64.tagp.p0i8(i8* %b, i8* %base, i64 1)
call void @use2(i8* %b, i8* %b_t)
ret void
}

; Both dynamic alloca and realigned frame.
; After initial realignment, generate the base pointer.
; IRG uses the base pointer w/o offset.
; Offsets for tagged and untagged pointers to the same alloca match.
define void @dynamic_alloca_and_realign(i64 %size) {
entryz:
; CHECK-LABEL: dynamic_alloca_and_realign:
; CHECK: and sp, x{{.*}}, #0xffffffffffffffc0
; CHECK: mov x19, sp
; CHECK: irg [[R:x[0-9]+]], x19
; CHECK: addg x1, [[R]], #[[OFS:[0-9]+]], #1
; CHECK: add x0, x19, #[[OFS]]
; CHECK: bl use2
%base = call i8* @llvm.aarch64.irg.sp(i64 0)
%a = alloca i128, i64 %size, align 64
%b = alloca i8, i64 16, align 16
%b_t = call i8* @llvm.aarch64.tagp.p0i8(i8* %b, i8* %base, i64 1)
call void @use2(i8* %b, i8* %b_t)
ret void
}

declare void @use(i8*)
declare void @use2(i8*, i8*)

declare i8* @llvm.aarch64.irg.sp(i64 %exclude)
declare i8* @llvm.aarch64.tagp.p0i8(i8* %p, i8* %tag, i64 %ofs)
138 changes: 138 additions & 0 deletions llvm/test/CodeGen/AArch64/settag.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
; RUN: llc < %s -mtriple=aarch64 -mattr=+mte | FileCheck %s

define void @stg1(i8* %p) {
entry:
; CHECK-LABEL: stg1:
; CHECK: stg x0, [x0]
; CHECK: ret
call void @llvm.aarch64.settag(i8* %p, i64 16)
ret void
}

define void @stg2(i8* %p) {
entry:
; CHECK-LABEL: stg2:
; CHECK: st2g x0, [x0]
; CHECK: ret
call void @llvm.aarch64.settag(i8* %p, i64 32)
ret void
}

define void @stg3(i8* %p) {
entry:
; CHECK-LABEL: stg3:
; CHECK: stg x0, [x0, #32]
; CHECK: st2g x0, [x0]
; CHECK: ret
call void @llvm.aarch64.settag(i8* %p, i64 48)
ret void
}

define void @stg4(i8* %p) {
entry:
; CHECK-LABEL: stg4:
; CHECK: st2g x0, [x0, #32]
; CHECK: st2g x0, [x0]
; CHECK: ret
call void @llvm.aarch64.settag(i8* %p, i64 64)
ret void
}

define void @stg5(i8* %p) {
entry:
; CHECK-LABEL: stg5:
; CHECK: stg x0, [x0, #64]
; CHECK: st2g x0, [x0, #32]
; CHECK: st2g x0, [x0]
; CHECK: ret
call void @llvm.aarch64.settag(i8* %p, i64 80)
ret void
}

define void @stg16(i8* %p) {
entry:
; CHECK-LABEL: stg16:
; CHECK: mov {{(w|x)}}[[R:[0-9]+]], #256
; CHECK: st2g x0, [x0], #32
; CHECK: sub x[[R]], x[[R]], #32
; CHECK: cbnz x[[R]],
; CHECK: ret
call void @llvm.aarch64.settag(i8* %p, i64 256)
ret void
}

define void @stg17(i8* %p) {
entry:
; CHECK-LABEL: stg17:
; CHECK: mov {{(w|x)}}[[R:[0-9]+]], #256
; CHECK: stg x0, [x0], #16
; CHECK: st2g x0, [x0], #32
; CHECK: sub x[[R]], x[[R]], #32
; CHECK: cbnz x[[R]],
; CHECK: ret
call void @llvm.aarch64.settag(i8* %p, i64 272)
ret void
}

define void @stzg3(i8* %p) {
entry:
; CHECK-LABEL: stzg3:
; CHECK: stzg x0, [x0, #32]
; CHECK: stz2g x0, [x0]
; CHECK: ret
call void @llvm.aarch64.settag.zero(i8* %p, i64 48)
ret void
}

define void @stzg17(i8* %p) {
entry:
; CHECK-LABEL: stzg17:
; CHECK: mov {{w|x}}[[R:[0-9]+]], #256
; CHECK: stzg x0, [x0], #16
; CHECK: stz2g x0, [x0], #32
; CHECK: sub x[[R]], x[[R]], #32
; CHECK: cbnz x[[R]],
; CHECK: ret
call void @llvm.aarch64.settag.zero(i8* %p, i64 272)
ret void
}

define void @stg_alloca1() {
entry:
; CHECK-LABEL: stg_alloca1:
; CHECK: stg sp, [sp]
; CHECK: ret
%a = alloca i8, i32 16, align 16
call void @llvm.aarch64.settag(i8* %a, i64 16)
ret void
}

define void @stg_alloca5() {
entry:
; CHECK-LABEL: stg_alloca5:
; CHECK: stg sp, [sp, #64]
; CHECK: st2g sp, [sp, #32]
; CHECK: st2g sp, [sp]
; CHECK: ret
%a = alloca i8, i32 80, align 16
call void @llvm.aarch64.settag(i8* %a, i64 80)
ret void
}

define void @stg_alloca17() {
entry:
; CHECK-LABEL: stg_alloca17:
; CHECK: mov [[P:x[0-9]+]], sp
; CHECK: stg [[P]], {{\[}}[[P]]{{\]}}, #16
; CHECK: mov {{w|x}}[[R:[0-9]+]], #256
; CHECK: st2g [[P]], {{\[}}[[P]]{{\]}}, #32
; CHECK: sub x[[R]], x[[R]], #32
; CHECK: cbnz x[[R]],
; CHECK: ret
%a = alloca i8, i32 272, align 16
call void @llvm.aarch64.settag(i8* %a, i64 272)
ret void
}

declare void @llvm.aarch64.settag(i8* %p, i64 %a)
declare void @llvm.aarch64.settag.zero(i8* %p, i64 %a)
78 changes: 78 additions & 0 deletions llvm/test/CodeGen/AArch64/stgp.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
; RUN: llc < %s -mtriple=aarch64 -mattr=+mte | FileCheck %s

define void @stgp0(i64 %a, i64 %b, i8* %p) {
entry:
; CHECK-LABEL: stgp0:
; CHECK: stgp x0, x1, [x2]
; CHECK: ret
call void @llvm.aarch64.stgp(i8* %p, i64 %a, i64 %b)
ret void
}

define void @stgp1004(i64 %a, i64 %b, i8* %p) {
entry:
; CHECK-LABEL: stgp1004:
; CHECK: add [[R:x[0-9]+]], x2, #1004
; CHECK: stgp x0, x1, {{\[}}[[R]]{{\]}}
; CHECK: ret
%q = getelementptr i8, i8* %p, i32 1004
call void @llvm.aarch64.stgp(i8* %q, i64 %a, i64 %b)
ret void
}

define void @stgp1008(i64 %a, i64 %b, i8* %p) {
entry:
; CHECK-LABEL: stgp1008:
; CHECK: stgp x0, x1, [x2, #1008]
; CHECK: ret
%q = getelementptr i8, i8* %p, i32 1008
call void @llvm.aarch64.stgp(i8* %q, i64 %a, i64 %b)
ret void
}

define void @stgp1024(i64 %a, i64 %b, i8* %p) {
entry:
; CHECK-LABEL: stgp1024:
; CHECK: add [[R:x[0-9]+]], x2, #1024
; CHECK: stgp x0, x1, {{\[}}[[R]]{{\]}}
; CHECK: ret
%q = getelementptr i8, i8* %p, i32 1024
call void @llvm.aarch64.stgp(i8* %q, i64 %a, i64 %b)
ret void
}

define void @stgp_1024(i64 %a, i64 %b, i8* %p) {
entry:
; CHECK-LABEL: stgp_1024:
; CHECK: stgp x0, x1, [x2, #-1024]
; CHECK: ret
%q = getelementptr i8, i8* %p, i32 -1024
call void @llvm.aarch64.stgp(i8* %q, i64 %a, i64 %b)
ret void
}

define void @stgp_1040(i64 %a, i64 %b, i8* %p) {
entry:
; CHECK-LABEL: stgp_1040:
; CHECK: sub [[R:x[0-9]+]], x2, #1040
; CHECK: stgp x0, x1, [x{{.*}}]
; CHECK: ret
%q = getelementptr i8, i8* %p, i32 -1040
call void @llvm.aarch64.stgp(i8* %q, i64 %a, i64 %b)
ret void
}

define void @stgp_alloca(i64 %a, i64 %b) {
entry:
; CHECK-LABEL: stgp_alloca:
; CHECK: stgp x0, x1, [sp]
; CHECK: stgp x1, x0, [sp, #16]
; CHECK: ret
%x = alloca i8, i32 32, align 16
call void @llvm.aarch64.stgp(i8* %x, i64 %a, i64 %b)
%x1 = getelementptr i8, i8* %x, i32 16
call void @llvm.aarch64.stgp(i8* %x1, i64 %b, i64 %a)
ret void
}

declare void @llvm.aarch64.stgp(i8* %p, i64 %a, i64 %b)
41 changes: 41 additions & 0 deletions llvm/test/CodeGen/AArch64/tagp.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
; RUN: llc < %s -mtriple=aarch64 -mattr=+mte | FileCheck %s

define i8* @tagp2(i8* %p, i8* %tag) {
entry:
; CHECK-LABEL: tagp2:
; CHECK: subp [[R:x[0-9]+]], x0, x1
; CHECK: add [[R]], [[R]], x1
; CHECK: addg x0, [[R]], #0, #2
; CHECK: ret
%q = call i8* @llvm.aarch64.tagp.p0i8(i8* %p, i8* %tag, i64 2)
ret i8* %q
}

define i8* @irg_tagp_unrelated(i8* %p, i8* %q) {
entry:
; CHECK-LABEL: irg_tagp_unrelated:
; CHECK: irg [[R0:x[0-9]+]], x0{{$}}
; CHECK: subp [[R:x[0-9]+]], [[R0]], x1
; CHECK: add [[R]], [[R0]], x1
; CHECK: addg x0, [[R]], #0, #1
; CHECK: ret
%p1 = call i8* @llvm.aarch64.irg(i8* %p, i64 0)
%q1 = call i8* @llvm.aarch64.tagp.p0i8(i8* %p1, i8* %q, i64 1)
ret i8* %q1
}

define i8* @tagp_alloca(i8* %tag) {
entry:
; CHECK-LABEL: tagp_alloca:
; CHECK: mov [[R0:x[0-9]+]], sp{{$}}
; CHECK: subp [[R:x[0-9]+]], [[R0]], x0{{$}}
; CHECK: add [[R]], [[R0]], x0{{$}}
; CHECK: addg x0, [[R]], #0, #3
; CHECK: ret
%a = alloca i8, align 16
%q = call i8* @llvm.aarch64.tagp.p0i8(i8* %a, i8* %tag, i64 3)
ret i8* %q
}

declare i8* @llvm.aarch64.irg(i8* %p, i64 %exclude)
declare i8* @llvm.aarch64.tagp.p0i8(i8* %p, i8* %tag, i64 %ofs)