Skip to content

Commit

Permalink
[AArch64][SVE] Add intrinsics for non-temporal gather-loads/scatter-s…
Browse files Browse the repository at this point in the history
…tores

Summary:
This patch adds the following LLVM IR intrinsics for SVE:
1. non-temporal gather loads
  * @llvm.aarch64.sve.ldnt1.gather
  * @llvm.aarch64.sve.ldnt1.gather.uxtw
  * @llvm.aarch64.sve.ldnt1.gather.scalar.offset
2. non-temporal scatter stores
  * @llvm.aarch64.sve.stnt1.scatter
  * @llvm.aarch64.sve.ldnt1.gather.uxtw
  * @llvm.aarch64.sve.ldnt1.gather.scalar.offset
These intrinsic are mapped to the corresponding SVE instructions
(example for half-words, zero-extending):
  * ldnt1h { z0.s }, p0/z, [z0.s, x0]
  * stnt1h { z0.s }, p0/z, [z0.s, x0]

Note that for non-temporal gathers/scatters, the SVE spec defines only
one instruction type: "vector + scalar". For this reason, we swap the
arguments when processing intrinsics that implement the "scalar +
vector" addressing mode:
  * @llvm.aarch64.sve.ldnt1.gather
  * @llvm.aarch64.sve.ldnt1.gather.uxtw
  * @llvm.aarch64.sve.stnt1.scatter
  * @llvm.aarch64.sve.ldnt1.gather.uxtw
In other words, all intrinsics for gather-loads and scatter-stores
implemented in this patch are mapped to the same load and store
instruction, respectively.

The sve2_mem_gldnt_vs multiclass (and it's counterpart for scatter
stores) from SVEInstrFormats.td was split into:
  * sve2_mem_gldnt_vec_vs_32_ptrs (32bit wide base addresses)
  * sve2_mem_gldnt_vec_vs_62_ptrs (64bit wide base addresses)
This is consistent with what we did for
@llvm.aarch64.sve.ld1.scalar.offset and highlights the actual split in
the spec and the implementation.

Reviewed by: sdesmalen

Differential Revision: https://reviews.llvm.org/D74858
  • Loading branch information
banach-space committed Mar 2, 2020
1 parent 1a8cbfa commit 9249f60
Show file tree
Hide file tree
Showing 11 changed files with 826 additions and 34 deletions.
32 changes: 32 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsAArch64.td
Expand Up @@ -1760,6 +1760,22 @@ def int_aarch64_sve_ldff1_gather_uxtw_index : AdvSIMD_GatherLoad_SV_32b_Offsets_
def int_aarch64_sve_ldff1_gather_scalar_offset : AdvSIMD_GatherLoad_VS_Intrinsic;


//
// Non-temporal gather loads: scalar base + vector offsets
//

// 64 bit unscaled offsets
def int_aarch64_sve_ldnt1_gather : AdvSIMD_GatherLoad_SV_64b_Offsets_Intrinsic;

// 32 bit unscaled offsets, zero (zxtw) extended to 64 bits
def int_aarch64_sve_ldnt1_gather_uxtw : AdvSIMD_GatherLoad_SV_32b_Offsets_Intrinsic;

//
// Non-temporal gather loads: vector base + scalar offset
//

def int_aarch64_sve_ldnt1_gather_scalar_offset : AdvSIMD_GatherLoad_VS_Intrinsic;

//
// Scatter stores: scalar base + vector offsets
//
Expand Down Expand Up @@ -1791,6 +1807,22 @@ def int_aarch64_sve_st1_scatter_uxtw_index

def int_aarch64_sve_st1_scatter_scalar_offset : AdvSIMD_ScatterStore_VS_Intrinsic;

//
// Non-temporal scatter stores: scalar base + vector offsets
//

// 64 bit unscaled offsets
def int_aarch64_sve_stnt1_scatter : AdvSIMD_ScatterStore_SV_64b_Offsets_Intrinsic;

// 32 bit unscaled offsets, zero (zxtw) extended to 64 bits
def int_aarch64_sve_stnt1_scatter_uxtw : AdvSIMD_ScatterStore_SV_32b_Offsets_Intrinsic;

//
// Non-temporal scatter stores: vector base + scalar offset
//

def int_aarch64_sve_stnt1_scatter_scalar_offset : AdvSIMD_ScatterStore_VS_Intrinsic;

//
// SVE2 - Uniform DSP operations
//
Expand Down
39 changes: 39 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Expand Up @@ -1438,13 +1438,20 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::GLDFF1S_UXTW_SCALED:
return "AArch64ISD::GLDFF1S_UXTW_SCALED";
case AArch64ISD::GLDFF1S_IMM: return "AArch64ISD::GLDFF1S_IMM";

case AArch64ISD::GLDNT1: return "AArch64ISD::GLDNT1";
case AArch64ISD::GLDNT1S: return "AArch64ISD::GLDNT1S";

case AArch64ISD::SST1: return "AArch64ISD::SST1";
case AArch64ISD::SST1_SCALED: return "AArch64ISD::SST1_SCALED";
case AArch64ISD::SST1_SXTW: return "AArch64ISD::SST1_SXTW";
case AArch64ISD::SST1_UXTW: return "AArch64ISD::SST1_UXTW";
case AArch64ISD::SST1_SXTW_SCALED: return "AArch64ISD::SST1_SXTW_SCALED";
case AArch64ISD::SST1_UXTW_SCALED: return "AArch64ISD::SST1_UXTW_SCALED";
case AArch64ISD::SST1_IMM: return "AArch64ISD::SST1_IMM";

case AArch64ISD::SSTNT1: return "AArch64ISD::SSTNT1";

case AArch64ISD::LDP: return "AArch64ISD::LDP";
case AArch64ISD::STP: return "AArch64ISD::STP";
case AArch64ISD::STNP: return "AArch64ISD::STNP";
Expand Down Expand Up @@ -10457,6 +10464,7 @@ static SDValue performSVEAndCombine(SDNode *N,
case AArch64ISD::GLDFF1_UXTW:
case AArch64ISD::GLDFF1_UXTW_SCALED:
case AArch64ISD::GLDFF1_IMM:
case AArch64ISD::GLDNT1:
MemVT = cast<VTSDNode>(Src->getOperand(4))->getVT();
break;
default:
Expand Down Expand Up @@ -12644,6 +12652,14 @@ static SDValue performScatterStoreCombine(SDNode *N, SelectionDAG &DAG,
// vector of offsets (that fits into one register)
SDValue Offset = N->getOperand(5);

// In the case of non-temporal gather loads there's only one SVE instruction
// per data-size: "scalar + vector", i.e.
// * stnt1{b|h|w|d} { z0.s }, p0/z, [z0.s, x0]
// Since we do have intrinsics that allow the arguments to be in a different
// order, we may need to swap them to match the spec.
if (Opcode == AArch64ISD::SSTNT1 && Offset.getValueType().isVector())
std::swap(Base, Offset);

// SST1_IMM requires that the offset is an immediate that is:
// * a multiple of #SizeInBytes,
// * in the range [0, 31 x #SizeInBytes],
Expand Down Expand Up @@ -12730,6 +12746,14 @@ static SDValue performGatherLoadCombine(SDNode *N, SelectionDAG &DAG,
// vector of offsets (that fits into one register)
SDValue Offset = N->getOperand(4);

// In the case of non-temporal gather loads there's only one SVE instruction
// per data-size: "scalar + vector", i.e.
// * ldnt1{b|h|w|d} { z0.s }, p0/z, [z0.s, x0]
// Since we do have intrinsics that allow the arguments to be in a different
// order, we may need to swap them to match the spec.
if (Opcode == AArch64ISD::GLDNT1 && Offset.getValueType().isVector())
std::swap(Base, Offset);

// GLD{FF}1_IMM requires that the offset is an immediate that is:
// * a multiple of #SizeInBytes,
// * in the range [0, 31 x #SizeInBytes],
Expand Down Expand Up @@ -12859,6 +12883,9 @@ performSignExtendInRegCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
case AArch64ISD::GLDFF1_IMM:
NewOpc = AArch64ISD::GLDFF1S_IMM;
break;
case AArch64ISD::GLDNT1:
NewOpc = AArch64ISD::GLDNT1S;
break;
default:
return SDValue();
}
Expand Down Expand Up @@ -12972,12 +12999,24 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performNEONPostLDSTCombine(N, DCI, DAG);
case Intrinsic::aarch64_sve_ldnt1:
return performLDNT1Combine(N, DAG);
case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1);
case Intrinsic::aarch64_sve_ldnt1_gather:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1);
case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1);
case Intrinsic::aarch64_sve_ldnf1:
return performLDNF1Combine(N, DAG, AArch64ISD::LDNF1);
case Intrinsic::aarch64_sve_ldff1:
return performLDNF1Combine(N, DAG, AArch64ISD::LDFF1);
case Intrinsic::aarch64_sve_stnt1:
return performSTNT1Combine(N, DAG);
case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1);
case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1);
case Intrinsic::aarch64_sve_stnt1_scatter:
return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1);
case Intrinsic::aarch64_sve_ld1_gather:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1);
case Intrinsic::aarch64_sve_ld1_gather_index:
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Expand Up @@ -261,6 +261,10 @@ enum NodeType : unsigned {
GLDFF1S_SXTW_SCALED,
GLDFF1S_IMM,

// Non-temporal gather loads
GLDNT1,
GLDNT1S,

// Scatter store
SST1,
SST1_SCALED,
Expand All @@ -270,6 +274,9 @@ enum NodeType : unsigned {
SST1_SXTW_SCALED,
SST1_IMM,

// Non-temporal scatter store
SSTNT1,

// Strict (exception-raising) floating point comparison
STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
STRICT_FCMPE,
Expand Down
47 changes: 26 additions & 21 deletions llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Expand Up @@ -69,6 +69,9 @@ def AArch64ldff1s_gather_uxtw_scaled : SDNode<"AArch64ISD::GLDFF1S_UXTW_SCALED",
def AArch64ldff1s_gather_sxtw_scaled : SDNode<"AArch64ISD::GLDFF1S_SXTW_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
def AArch64ldff1s_gather_imm : SDNode<"AArch64ISD::GLDFF1S_IMM", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;

def AArch64ldnt1_gather : SDNode<"AArch64ISD::GLDNT1", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>;
def AArch64ldnt1s_gather : SDNode<"AArch64ISD::GLDNT1S", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>;

// Scatter stores - node definitions
//
def SDT_AArch64_SCATTER_SV : SDTypeProfile<0, 5, [
Expand All @@ -89,6 +92,8 @@ def AArch64st1_scatter_uxtw_scaled : SDNode<"AArch64ISD::SST1_UXTW_SCALED", SDT_
def AArch64st1_scatter_sxtw_scaled : SDNode<"AArch64ISD::SST1_SXTW_SCALED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
def AArch64st1_scatter_imm : SDNode<"AArch64ISD::SST1_IMM", SDT_AArch64_SCATTER_VS, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;

def AArch64stnt1_scatter : SDNode<"AArch64ISD::SSTNT1", SDT_AArch64_SCATTER_VS, [SDNPHasChain, SDNPMayStore]>;

// AArch64 SVE/SVE2 - the remaining node definitions
//

Expand Down Expand Up @@ -1909,32 +1914,32 @@ let Predicates = [HasSVE2] in {
def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">;

// SVE2 non-temporal gather loads
defm LDNT1SB_ZZR_S : sve2_mem_gldnt_vs<0b00000, "ldnt1sb", Z_s, ZPR32>;
defm LDNT1B_ZZR_S : sve2_mem_gldnt_vs<0b00001, "ldnt1b", Z_s, ZPR32>;
defm LDNT1SH_ZZR_S : sve2_mem_gldnt_vs<0b00100, "ldnt1sh", Z_s, ZPR32>;
defm LDNT1H_ZZR_S : sve2_mem_gldnt_vs<0b00101, "ldnt1h", Z_s, ZPR32>;
defm LDNT1W_ZZR_S : sve2_mem_gldnt_vs<0b01001, "ldnt1w", Z_s, ZPR32>;

defm LDNT1SB_ZZR_D : sve2_mem_gldnt_vs<0b10000, "ldnt1sb", Z_d, ZPR64>;
defm LDNT1B_ZZR_D : sve2_mem_gldnt_vs<0b10010, "ldnt1b", Z_d, ZPR64>;
defm LDNT1SH_ZZR_D : sve2_mem_gldnt_vs<0b10100, "ldnt1sh", Z_d, ZPR64>;
defm LDNT1H_ZZR_D : sve2_mem_gldnt_vs<0b10110, "ldnt1h", Z_d, ZPR64>;
defm LDNT1SW_ZZR_D : sve2_mem_gldnt_vs<0b11000, "ldnt1sw", Z_d, ZPR64>;
defm LDNT1W_ZZR_D : sve2_mem_gldnt_vs<0b11010, "ldnt1w", Z_d, ZPR64>;
defm LDNT1D_ZZR_D : sve2_mem_gldnt_vs<0b11110, "ldnt1d", Z_d, ZPR64>;
defm LDNT1SB_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00000, "ldnt1sb", AArch64ldnt1s_gather, nxv4i8>;
defm LDNT1B_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00001, "ldnt1b", AArch64ldnt1_gather, nxv4i8>;
defm LDNT1SH_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00100, "ldnt1sh", AArch64ldnt1s_gather, nxv4i16>;
defm LDNT1H_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00101, "ldnt1h", AArch64ldnt1_gather, nxv4i16>;
defm LDNT1W_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b01001, "ldnt1w", AArch64ldnt1_gather, nxv4i32>;

defm LDNT1SB_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b10000, "ldnt1sb", AArch64ldnt1s_gather, nxv2i8>;
defm LDNT1B_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b10010, "ldnt1b", AArch64ldnt1_gather, nxv2i8>;
defm LDNT1SH_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b10100, "ldnt1sh", AArch64ldnt1s_gather, nxv2i16>;
defm LDNT1H_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b10110, "ldnt1h", AArch64ldnt1_gather, nxv2i16>;
defm LDNT1SW_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b11000, "ldnt1sw", AArch64ldnt1s_gather, nxv2i32>;
defm LDNT1W_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b11010, "ldnt1w", AArch64ldnt1_gather, nxv2i32>;
defm LDNT1D_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b11110, "ldnt1d", AArch64ldnt1_gather, nxv2i64>;

// SVE2 vector splice (constructive)
defm SPLICE_ZPZZ : sve2_int_perm_splice_cons<"splice">;

// SVE2 non-temporal scatter stores
defm STNT1B_ZZR_S : sve2_mem_sstnt_vs<0b001, "stnt1b", Z_s, ZPR32>;
defm STNT1H_ZZR_S : sve2_mem_sstnt_vs<0b011, "stnt1h", Z_s, ZPR32>;
defm STNT1W_ZZR_S : sve2_mem_sstnt_vs<0b101, "stnt1w", Z_s, ZPR32>;

defm STNT1B_ZZR_D : sve2_mem_sstnt_vs<0b000, "stnt1b", Z_d, ZPR64>;
defm STNT1H_ZZR_D : sve2_mem_sstnt_vs<0b010, "stnt1h", Z_d, ZPR64>;
defm STNT1W_ZZR_D : sve2_mem_sstnt_vs<0b100, "stnt1w", Z_d, ZPR64>;
defm STNT1D_ZZR_D : sve2_mem_sstnt_vs<0b110, "stnt1d", Z_d, ZPR64>;
defm STNT1B_ZZR_S : sve2_mem_sstnt_vs_32_ptrs<0b001, "stnt1b", AArch64stnt1_scatter, nxv4i8>;
defm STNT1H_ZZR_S : sve2_mem_sstnt_vs_32_ptrs<0b011, "stnt1h", AArch64stnt1_scatter, nxv4i16>;
defm STNT1W_ZZR_S : sve2_mem_sstnt_vs_32_ptrs<0b101, "stnt1w", AArch64stnt1_scatter, nxv4i32>;

defm STNT1B_ZZR_D : sve2_mem_sstnt_vs_64_ptrs<0b000, "stnt1b", AArch64stnt1_scatter, nxv2i8>;
defm STNT1H_ZZR_D : sve2_mem_sstnt_vs_64_ptrs<0b010, "stnt1h", AArch64stnt1_scatter, nxv2i16>;
defm STNT1W_ZZR_D : sve2_mem_sstnt_vs_64_ptrs<0b100, "stnt1w", AArch64stnt1_scatter, nxv2i32>;
defm STNT1D_ZZR_D : sve2_mem_sstnt_vs_64_ptrs<0b110, "stnt1d", AArch64stnt1_scatter, nxv2i64>;

// SVE2 table lookup (three sources)
defm TBL_ZZZZ : sve2_int_perm_tbl<"tbl", int_aarch64_sve_tbl2>;
Expand Down
67 changes: 54 additions & 13 deletions llvm/lib/Target/AArch64/SVEInstrFormats.td
Expand Up @@ -5071,16 +5071,36 @@ class sve2_mem_sstnt_vs_base<bits<3> opc, string asm,
let mayStore = 1;
}

multiclass sve2_mem_sstnt_vs<bits<3> opc, string asm,
RegisterOperand listty, ZPRRegOp zprty> {
def _REAL : sve2_mem_sstnt_vs_base<opc, asm, listty, zprty>;
multiclass sve2_mem_sstnt_vs_32_ptrs<bits<3> opc, string asm,
SDPatternOperator op,
ValueType vt> {
def _REAL : sve2_mem_sstnt_vs_base<opc, asm, Z_s, ZPR32>;

def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $Rm]",
(!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;
(!cast<Instruction>(NAME # _REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, GPR64:$Rm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
(!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 0>;
(!cast<Instruction>(NAME # _REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, XZR), 0>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
(!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 1>;
(!cast<Instruction>(NAME # _REAL) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, XZR), 1>;

def : Pat <(op (nxv4i32 ZPR32:$Zt), (nxv4i1 PPR3bAny:$Pg), (nxv4i32 ZPR32:$Zn), (i64 GPR64:$Rm), vt),
(!cast<Instruction>(NAME # _REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, GPR64:$Rm)>;
}

multiclass sve2_mem_sstnt_vs_64_ptrs<bits<3> opc, string asm,
SDPatternOperator op,
ValueType vt> {
def _REAL : sve2_mem_sstnt_vs_base<opc, asm, Z_d, ZPR64>;

def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $Rm]",
(!cast<Instruction>(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
(!cast<Instruction>(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 0>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
(!cast<Instruction>(NAME # _REAL) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 1>;

def : Pat <(op (nxv2i64 ZPR64:$Zt), (nxv2i1 PPR3bAny:$Pg), (nxv2i64 ZPR64:$Zn), (i64 GPR64:$Rm), vt),
(!cast<Instruction>(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm)>;
}

class sve_mem_sst_sv<bits<3> opc, bit xs, bit scaled, string asm,
Expand Down Expand Up @@ -6529,17 +6549,38 @@ class sve2_mem_gldnt_vs_base<bits<5> opc, dag iops, string asm,
let mayLoad = 1;
}

multiclass sve2_mem_gldnt_vs<bits<5> opc, string asm,
RegisterOperand listty, ZPRRegOp zprty> {
def _REAL : sve2_mem_gldnt_vs_base<opc, (ins PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm),
asm, listty>;
multiclass sve2_mem_gldnt_vs_32_ptrs<bits<5> opc, string asm,
SDPatternOperator op,
ValueType vt> {
def _REAL : sve2_mem_gldnt_vs_base<opc, (ins PPR3bAny:$Pg, ZPR32:$Zn, GPR64:$Rm),
asm, Z_s>;

def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn, $Rm]",
(!cast<Instruction>(NAME # _REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, GPR64:$Rm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
(!cast<Instruction>(NAME # _REAL) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, XZR), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
(!cast<Instruction>(NAME # _REAL) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, XZR), 1>;

def : Pat <(nxv4i32 (op (nxv4i1 PPR3bAny:$Pg), (nxv4i32 ZPR32:$Zd), (i64 GPR64:$Rm), vt)),
(!cast<Instruction>(NAME # _REAL) PPR3bAny:$Pg, ZPR32:$Zd, GPR64:$Rm)>;
}

multiclass sve2_mem_gldnt_vs_64_ptrs<bits<5> opc, string asm,
SDPatternOperator op,
ValueType vt> {
def _REAL : sve2_mem_gldnt_vs_base<opc, (ins PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm),
asm, Z_d>;

def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn, $Rm]",
(!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;
(!cast<Instruction>(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, GPR64:$Rm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
(!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 0>;
(!cast<Instruction>(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
(!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 1>;
(!cast<Instruction>(NAME # _REAL) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 1>;

def : Pat <(nxv2i64 (op (nxv2i1 PPR3bAny:$Pg), (nxv2i64 ZPR64:$Zd), (i64 GPR64:$Rm), vt)),
(!cast<Instruction>(NAME # _REAL) PPR3bAny:$Pg, ZPR64:$Zd, GPR64:$Rm)>;
}

//===----------------------------------------------------------------------===//
Expand Down

0 comments on commit 9249f60

Please sign in to comment.