Skip to content

Commit

Permalink
[llvm][SVE] Addressing mode for FF/NF loads.
Browse files Browse the repository at this point in the history
Summary:
This patch adds addressing mode computation for the following SVE
instructions:

* ldff1{s}<T1> { <Zt>.<T2> }, <Pg>/Z, [<Xn|SP>{, <Xm>{, lsl #imm}}]
* ldnf1{s}<T1> { <Zt>.<T2> }, <Pg>/Z, [<Xn|SP>{, #<imm>, mul vl}]

Reviewers: andwar, sdesmalen, rengolin, efriedma

Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D76209
  • Loading branch information
Francesco Petrogalli committed Mar 18, 2020
1 parent 4788ca4 commit 9bdcd9b
Show file tree
Hide file tree
Showing 4 changed files with 508 additions and 22 deletions.
10 changes: 10 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
Expand Up @@ -4509,6 +4509,16 @@ static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) {
return cast<MemSDNode>(Root)->getMemoryVT();

const unsigned Opcode = Root->getOpcode();
// For custom ISD nodes, we have to look at them individually to extract the
// type of the data moved to/from memory.
switch (Opcode) {
case AArch64ISD::LDNF1:
case AArch64ISD::LDNF1S:
return cast<VTSDNode>(Root->getOperand(3))->getVT();
default:
break;
}

if (Opcode != ISD::INTRINSIC_VOID)
return EVT();

Expand Down
55 changes: 33 additions & 22 deletions llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Expand Up @@ -1538,6 +1538,12 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
defm Pat_Load_P2 : unpred_load_predicate<nxv2i1, LDR_PXI>;

multiclass ldnf1<Instruction I, ValueType Ty, SDPatternOperator Load, ValueType PredTy, ValueType MemVT> {
// scalar + immediate (mul vl)
let AddedComplexity = 1 in {
def : Pat<(Ty (Load (PredTy PPR:$gp), (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$offset), MemVT)),
(I PPR:$gp, GPR64sp:$base, simm4s1:$offset)>;
}

// base
def : Pat<(Ty (Load (PredTy PPR:$gp), GPR64:$base, MemVT)),
(I PPR:$gp, GPR64sp:$base, (i64 0))>;
Expand Down Expand Up @@ -1570,40 +1576,45 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
// 16-element contiguous non-faulting loads
defm : ldnf1<LDNF1B_IMM, nxv16i8, AArch64ldnf1, nxv16i1, nxv16i8>;

multiclass ldff1<Instruction I, ValueType Ty, SDPatternOperator Load, ValueType PredTy, ValueType MemVT> {
// Add more complex addressing modes here as required.
multiclass ldff1<Instruction I, ValueType Ty, SDPatternOperator Load, ValueType PredTy, ValueType MemVT, ComplexPattern AddrCP> {
// reg + reg
let AddedComplexity = 1 in {
def : Pat<(Ty (Load (PredTy PPR:$gp), (AddrCP GPR64:$base, GPR64:$offset), MemVT)),
(I PPR:$gp, GPR64sp:$base, GPR64:$offset)>;
}

// Base
def : Pat<(Ty (Load (PredTy PPR:$gp), GPR64:$base, MemVT)),
(I PPR:$gp, GPR64sp:$base, XZR)>;
}

// 2-element contiguous first faulting loads
defm : ldff1<LDFF1B_D, nxv2i64, AArch64ldff1, nxv2i1, nxv2i8>;
defm : ldff1<LDFF1SB_D, nxv2i64, AArch64ldff1s, nxv2i1, nxv2i8>;
defm : ldff1<LDFF1H_D, nxv2i64, AArch64ldff1, nxv2i1, nxv2i16>;
defm : ldff1<LDFF1SH_D, nxv2i64, AArch64ldff1s, nxv2i1, nxv2i16>;
defm : ldff1<LDFF1W_D, nxv2i64, AArch64ldff1, nxv2i1, nxv2i32>;
defm : ldff1<LDFF1SW_D, nxv2i64, AArch64ldff1s, nxv2i1, nxv2i32>;
defm : ldff1<LDFF1D, nxv2i64, AArch64ldff1, nxv2i1, nxv2i64>;
defm : ldff1<LDFF1W_D, nxv2f32, AArch64ldff1, nxv2i1, nxv2f32>;
defm : ldff1<LDFF1D, nxv2f64, AArch64ldff1, nxv2i1, nxv2f64>;
defm : ldff1<LDFF1B_D, nxv2i64, AArch64ldff1, nxv2i1, nxv2i8, am_sve_regreg_lsl0>;
defm : ldff1<LDFF1SB_D, nxv2i64, AArch64ldff1s, nxv2i1, nxv2i8, am_sve_regreg_lsl0>;
defm : ldff1<LDFF1H_D, nxv2i64, AArch64ldff1, nxv2i1, nxv2i16, am_sve_regreg_lsl1>;
defm : ldff1<LDFF1SH_D, nxv2i64, AArch64ldff1s, nxv2i1, nxv2i16, am_sve_regreg_lsl1>;
defm : ldff1<LDFF1W_D, nxv2i64, AArch64ldff1, nxv2i1, nxv2i32, am_sve_regreg_lsl2>;
defm : ldff1<LDFF1SW_D, nxv2i64, AArch64ldff1s, nxv2i1, nxv2i32, am_sve_regreg_lsl2>;
defm : ldff1<LDFF1D, nxv2i64, AArch64ldff1, nxv2i1, nxv2i64, am_sve_regreg_lsl3>;
defm : ldff1<LDFF1W_D, nxv2f32, AArch64ldff1, nxv2i1, nxv2f32, am_sve_regreg_lsl2>;
defm : ldff1<LDFF1D, nxv2f64, AArch64ldff1, nxv2i1, nxv2f64, am_sve_regreg_lsl3>;

// 4-element contiguous first faulting loads
defm : ldff1<LDFF1B_S, nxv4i32, AArch64ldff1, nxv4i1, nxv4i8>;
defm : ldff1<LDFF1SB_S, nxv4i32, AArch64ldff1s, nxv4i1, nxv4i8>;
defm : ldff1<LDFF1H_S, nxv4i32, AArch64ldff1, nxv4i1, nxv4i16>;
defm : ldff1<LDFF1SH_S, nxv4i32, AArch64ldff1s, nxv4i1, nxv4i16>;
defm : ldff1<LDFF1W, nxv4i32, AArch64ldff1, nxv4i1, nxv4i32>;
defm : ldff1<LDFF1W, nxv4f32, AArch64ldff1, nxv4i1, nxv4f32>;
defm : ldff1<LDFF1B_S, nxv4i32, AArch64ldff1, nxv4i1, nxv4i8, am_sve_regreg_lsl0>;
defm : ldff1<LDFF1SB_S, nxv4i32, AArch64ldff1s, nxv4i1, nxv4i8, am_sve_regreg_lsl0>;
defm : ldff1<LDFF1H_S, nxv4i32, AArch64ldff1, nxv4i1, nxv4i16, am_sve_regreg_lsl1>;
defm : ldff1<LDFF1SH_S, nxv4i32, AArch64ldff1s, nxv4i1, nxv4i16, am_sve_regreg_lsl1>;
defm : ldff1<LDFF1W, nxv4i32, AArch64ldff1, nxv4i1, nxv4i32, am_sve_regreg_lsl2>;
defm : ldff1<LDFF1W, nxv4f32, AArch64ldff1, nxv4i1, nxv4f32, am_sve_regreg_lsl2>;

// 8-element contiguous first faulting loads
defm : ldff1<LDFF1B_H, nxv8i16, AArch64ldff1, nxv8i1, nxv8i8>;
defm : ldff1<LDFF1SB_H, nxv8i16, AArch64ldff1s, nxv8i1, nxv8i8>;
defm : ldff1<LDFF1H, nxv8i16, AArch64ldff1, nxv8i1, nxv8i16>;
defm : ldff1<LDFF1H, nxv8f16, AArch64ldff1, nxv8i1, nxv8f16>;
defm : ldff1<LDFF1B_H, nxv8i16, AArch64ldff1, nxv8i1, nxv8i8, am_sve_regreg_lsl0>;
defm : ldff1<LDFF1SB_H, nxv8i16, AArch64ldff1s, nxv8i1, nxv8i8, am_sve_regreg_lsl0>;
defm : ldff1<LDFF1H, nxv8i16, AArch64ldff1, nxv8i1, nxv8i16, am_sve_regreg_lsl1>;
defm : ldff1<LDFF1H, nxv8f16, AArch64ldff1, nxv8i1, nxv8f16, am_sve_regreg_lsl1>;

// 16-element contiguous first faulting loads
defm : ldff1<LDFF1B, nxv16i8, AArch64ldff1, nxv16i1, nxv16i8>;
defm : ldff1<LDFF1B, nxv16i8, AArch64ldff1, nxv16i1, nxv16i8, am_sve_regreg_lsl0>;
}

let Predicates = [HasSVE2] in {
Expand Down

0 comments on commit 9bdcd9b

Please sign in to comment.