Skip to content

Commit

Permalink
Implement IR intrinsics for gather prefetch.
Browse files Browse the repository at this point in the history
Summary:
Intrinsics and relative codegen has been implemented for the following
SVE instructions:

1. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>] -> 32-bit          scaled offset
2. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D, <mod>] -> 32-bit unpacked scaled offset
3. PRF<T> <prfop>, <Pg>, [<Xn|SP>, <Zm>.D]        -> 64-bit          scaled offset
4. PRF<T> <prfop>, <Pg>, [<Zn>.S{, #<imm>}]       -> 32-bit element
5. PRF<T> <prfop>, <Pg>, [<Zn>.D{, #<imm>}]       -> 64-bit element

The instructions are associated the following intrinsics, respectively:

1. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx4vi32(
          i8* %base,
          <vscale x 4 x i32> %offset,
          <vscale x 4 x i1> %Pg,
          i32 %prfop)

2. void @llvm.aarch64.sve.gather.prf<T>.scaled.<mod>.nx2vi32(
          i8* %base,
          <vscale x 2 x i32> %offset,
          <vscale x 2 x i1> %Pg,
          i32 %prfop)

3. void @llvm.aarch64.sve.gather.prf<T>.scaled.nx2vi64(
          i8* %base,
          <vscale x 2 x i64> %offset,
          <vscale x 2 x i1> %Pg,
          i32 %prfop)

4. void @llvm.aarch64.sve.gather.prf<T>.nx4vi32(
          <vscale x 4 x i32> %bases,
          i64 %imm,
          <vscale x 4 x i1> %Pg,
          i32 %prfop)

5. void @llvm.aarch64.sve.gather.prf<T>.nx2vi64(
          <vscale x 2 x i64> %bases,
          i64 %imm,
          <vscale x 2 x i1> %Pg,
          i32 %prfop)

The intrinsics are the IR counterpart of the following SVE ACLE functions:

* void svprf<T>(svbool_t pg, const void *base, svprfop op)
* void svprf<T>_vnum(svbool_t pg, const void *base, int64_t vnum, svprfop op)
* void svprf<T>_gather[_u32base](svbool_t pg, svuint32_t bases, svprfop op)
* void svprf<T>_gather[_u64base](svbool_t pg, svuint64_t bases, svprfop op)
* void svprf<T>_gather_[s32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[u32]offset(svbool_t pg, const void *base, svint32_t offsets, svprfop op)
* void svprf<T>_gather_[s64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather_[u64]offset(svbool_t pg, const void *base, svint64_t offsets, svprfop op)
* void svprf<T>_gather[_u32base]_offset(svbool_t pg, svuint32_t bases, int64_t offset, svprfop op)
* void svprf<T>_gather[_u64base]_offset(svbool_t pg, svuint64_t bases,int64_t offset, svprfop op)

Reviewers: andwar, sdesmalen, efriedma, rengolin

Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D75580
  • Loading branch information
Francesco Petrogalli committed Mar 16, 2020
1 parent 0616e99 commit 0f2b68d
Show file tree
Hide file tree
Showing 8 changed files with 779 additions and 43 deletions.
49 changes: 48 additions & 1 deletion llvm/include/llvm/IR/IntrinsicsAArch64.td
Expand Up @@ -1263,6 +1263,27 @@ class AdvSIMD_ScatterStore_VS_Intrinsic
],
[IntrWriteMem, IntrArgMemOnly]>;


class SVE_gather_prf_scalar_base_vector_offset_scaled
: Intrinsic<[],
[
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, // Predicate
llvm_ptr_ty, // Base address
llvm_anyvector_ty, // Offsets
llvm_i32_ty // Prfop
],
[IntrInaccessibleMemOrArgMemOnly, NoCapture<1>, ImmArg<3>]>;

class SVE_gather_prf_vector_base_scalar_offset
: Intrinsic<[],
[
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, // Predicate
llvm_anyvector_ty, // Base addresses
llvm_i64_ty, // Scalar offset
llvm_i32_ty // Prfop
],
[IntrInaccessibleMemOrArgMemOnly, ImmArg<3>]>;

//
// Loads
//
Expand All @@ -1279,13 +1300,39 @@ def int_aarch64_sve_ldff1 : AdvSIMD_1Vec_PredFaultingLoad_Intrinsic;
def int_aarch64_sve_stnt1 : AdvSIMD_1Vec_PredStore_Intrinsic;

//
// Prefetch
// Prefetches
//

def int_aarch64_sve_prf
: Intrinsic<[], [llvm_anyvector_ty, llvm_ptr_ty, llvm_i32_ty],
[IntrArgMemOnly, ImmArg<2>]>;

// Scalar + 32-bit scaled offset vector, zero extend, packed and
// unpacked.
def int_aarch64_sve_gather_prfb_scaled_uxtw : SVE_gather_prf_scalar_base_vector_offset_scaled;
def int_aarch64_sve_gather_prfh_scaled_uxtw : SVE_gather_prf_scalar_base_vector_offset_scaled;
def int_aarch64_sve_gather_prfw_scaled_uxtw : SVE_gather_prf_scalar_base_vector_offset_scaled;
def int_aarch64_sve_gather_prfd_scaled_uxtw : SVE_gather_prf_scalar_base_vector_offset_scaled;

// Scalar + 32-bit scaled offset vector, sign extend, packed and
// unpacked.
def int_aarch64_sve_gather_prfb_scaled_sxtw : SVE_gather_prf_scalar_base_vector_offset_scaled;
def int_aarch64_sve_gather_prfw_scaled_sxtw : SVE_gather_prf_scalar_base_vector_offset_scaled;
def int_aarch64_sve_gather_prfh_scaled_sxtw : SVE_gather_prf_scalar_base_vector_offset_scaled;
def int_aarch64_sve_gather_prfd_scaled_sxtw : SVE_gather_prf_scalar_base_vector_offset_scaled;

// Scalar + 64-bit scaled offset vector.
def int_aarch64_sve_gather_prfb_scaled : SVE_gather_prf_scalar_base_vector_offset_scaled;
def int_aarch64_sve_gather_prfh_scaled : SVE_gather_prf_scalar_base_vector_offset_scaled;
def int_aarch64_sve_gather_prfw_scaled : SVE_gather_prf_scalar_base_vector_offset_scaled;
def int_aarch64_sve_gather_prfd_scaled : SVE_gather_prf_scalar_base_vector_offset_scaled;

// Vector + scalar.
def int_aarch64_sve_gather_prfb : SVE_gather_prf_vector_base_scalar_offset;
def int_aarch64_sve_gather_prfh : SVE_gather_prf_vector_base_scalar_offset;
def int_aarch64_sve_gather_prfw : SVE_gather_prf_vector_base_scalar_offset;
def int_aarch64_sve_gather_prfd : SVE_gather_prf_vector_base_scalar_offset;

//
// Scalar to vector operations
//
Expand Down
106 changes: 90 additions & 16 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Expand Up @@ -12646,6 +12646,20 @@ static SDValue getScaledOffsetForBitWidth(SelectionDAG &DAG, SDValue Offset,
return DAG.getNode(ISD::SHL, DL, MVT::nxv2i64, Offset, SplatShift);
}

/// Check if the value of \p Offset represents a valid immediate for the SVE
/// gather load/prefetch and scatter store instructiona with vector base and
/// immediate offset addressing mode:
///
/// [<Zn>.[S|D]{, #<imm>}]
///
/// where <imm> = sizeof(<T>) * k, for k = 0, 1, ..., 31.
static bool isValidImmForSVEVecImmAddrMode(SDValue Offset,
unsigned ScalarSizeInBytes) {
ConstantSDNode *OffsetConst = dyn_cast<ConstantSDNode>(Offset.getNode());
return OffsetConst && AArch64_AM::isValidImmForSVEVecImmAddrMode(
OffsetConst->getZExtValue(), ScalarSizeInBytes);
}

static SDValue performScatterStoreCombine(SDNode *N, SelectionDAG &DAG,
unsigned Opcode,
bool OnlyPackedOffsets = true) {
Expand Down Expand Up @@ -12697,13 +12711,9 @@ static SDValue performScatterStoreCombine(SDNode *N, SelectionDAG &DAG,
// immediates outside that range and non-immediate scalar offsets use SST1 or
// SST1_UXTW instead.
if (Opcode == AArch64ISD::SST1_IMM) {
uint64_t MaxIndex = 31;
uint64_t SrcElSize = SrcElVT.getStoreSize().getKnownMinSize();

ConstantSDNode *OffsetConst = dyn_cast<ConstantSDNode>(Offset.getNode());
if (nullptr == OffsetConst ||
OffsetConst->getZExtValue() > MaxIndex * SrcElSize ||
OffsetConst->getZExtValue() % SrcElSize) {
if (!isValidImmForSVEVecImmAddrMode(Offset,
SrcVT.getScalarSizeInBits() / 8)) {
if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy)
Opcode = AArch64ISD::SST1_UXTW;
else
Expand Down Expand Up @@ -12763,7 +12773,6 @@ static SDValue performGatherLoadCombine(SDNode *N, SelectionDAG &DAG,
"Gather loads are only possible for SVE vectors");

SDLoc DL(N);
MVT RetElVT = RetVT.getVectorElementType().getSimpleVT();

// Make sure that the loaded data will fit into an SVE register
if (RetVT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock)
Expand All @@ -12780,8 +12789,8 @@ static SDValue performGatherLoadCombine(SDNode *N, SelectionDAG &DAG,
// applies to non-temporal gathers because there's no instruction that takes
// indicies.
if (Opcode == AArch64ISD::GLDNT1_INDEX) {
Offset =
getScaledOffsetForBitWidth(DAG, Offset, DL, RetElVT.getSizeInBits());
Offset = getScaledOffsetForBitWidth(DAG, Offset, DL,
RetVT.getScalarSizeInBits());
Opcode = AArch64ISD::GLDNT1;
}

Expand All @@ -12800,13 +12809,8 @@ static SDValue performGatherLoadCombine(SDNode *N, SelectionDAG &DAG,
// immediates outside that range and non-immediate scalar offsets use GLD1 or
// GLD1_UXTW instead.
if (Opcode == AArch64ISD::GLD1_IMM || Opcode == AArch64ISD::GLDFF1_IMM) {
uint64_t MaxIndex = 31;
uint64_t RetElSize = RetElVT.getStoreSize().getKnownMinSize();

ConstantSDNode *OffsetConst = dyn_cast<ConstantSDNode>(Offset.getNode());
if (nullptr == OffsetConst ||
OffsetConst->getZExtValue() > MaxIndex * RetElSize ||
OffsetConst->getZExtValue() % RetElSize) {
if (!isValidImmForSVEVecImmAddrMode(Offset,
RetVT.getScalarSizeInBits() / 8)) {
if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy)
Opcode = (Opcode == AArch64ISD::GLD1_IMM) ? AArch64ISD::GLD1_UXTW
: AArch64ISD::GLDFF1_UXTW;
Expand Down Expand Up @@ -12950,6 +12954,51 @@ performSignExtendInRegCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
return SDValue(N, 0);
}

/// Legalize the gather prefetch (scalar + vector addressing mode) when the
/// offset vector is an unpacked 32-bit scalable vector. The other cases (Offset
/// != nxv2i32) do not need legalization.
static SDValue legalizeSVEGatherPrefetchOffsVec(SDNode *N, SelectionDAG &DAG) {
const unsigned OffsetPos = 4;
SDValue Offset = N->getOperand(OffsetPos);

// Not an unpacked vector, bail out.
if (Offset.getValueType().getSimpleVT().SimpleTy != MVT::nxv2i32)
return SDValue();

// Extend the unpacked offset vector to 64-bit lanes.
SDLoc DL(N);
Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset);
SmallVector<SDValue, 5> Ops(N->op_begin(), N->op_end());
// Replace the offset operand with the 64-bit one.
Ops[OffsetPos] = Offset;

return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
}

/// Combines a node carrying the intrinsic `aarch64_sve_gather_prf<T>` into a
/// node that uses `aarch64_sve_gather_prf<T>_scaled_uxtw` when the scalar
/// offset passed to `aarch64_sve_gather_prf<T>` is not a valid immediate for
/// the sve gather prefetch instruction with vector plus immediate addressing
/// mode.
static SDValue combineSVEPrefetchVecBaseImmOff(SDNode *N, SelectionDAG &DAG,
unsigned NewIID,
unsigned ScalarSizeInBytes) {
const unsigned ImmPos = 4, OffsetPos = 3;
// No need to combine the node if the immediate is valid...
if (isValidImmForSVEVecImmAddrMode(N->getOperand(ImmPos), ScalarSizeInBytes))
return SDValue();

// ...otherwise swap the offset base with the offset...
SmallVector<SDValue, 5> Ops(N->op_begin(), N->op_end());
std::swap(Ops[ImmPos], Ops[OffsetPos]);
// ...and remap the intrinsic `aarch64_sve_gather_prf<T>` to
// `aarch64_sve_gather_prf<T>_scaled_uxtw`.
SDLoc DL(N);
Ops[1] = DAG.getConstant(NewIID, DL, MVT::i64);

return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
}

SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
Expand Down Expand Up @@ -13014,6 +13063,31 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN:
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
case Intrinsic::aarch64_sve_gather_prfb:
return combineSVEPrefetchVecBaseImmOff(
N, DAG, Intrinsic::aarch64_sve_gather_prfb_scaled_uxtw,
1 /*=ScalarSizeInBytes*/);
case Intrinsic::aarch64_sve_gather_prfh:
return combineSVEPrefetchVecBaseImmOff(
N, DAG, Intrinsic::aarch64_sve_gather_prfh_scaled_uxtw,
2 /*=ScalarSizeInBytes*/);
case Intrinsic::aarch64_sve_gather_prfw:
return combineSVEPrefetchVecBaseImmOff(
N, DAG, Intrinsic::aarch64_sve_gather_prfw_scaled_uxtw,
4 /*=ScalarSizeInBytes*/);
case Intrinsic::aarch64_sve_gather_prfd:
return combineSVEPrefetchVecBaseImmOff(
N, DAG, Intrinsic::aarch64_sve_gather_prfd_scaled_uxtw,
8 /*=ScalarSizeInBytes*/);
case Intrinsic::aarch64_sve_gather_prfb_scaled_uxtw:
case Intrinsic::aarch64_sve_gather_prfb_scaled_sxtw:
case Intrinsic::aarch64_sve_gather_prfh_scaled_uxtw:
case Intrinsic::aarch64_sve_gather_prfh_scaled_sxtw:
case Intrinsic::aarch64_sve_gather_prfw_scaled_uxtw:
case Intrinsic::aarch64_sve_gather_prfw_scaled_sxtw:
case Intrinsic::aarch64_sve_gather_prfd_scaled_uxtw:
case Intrinsic::aarch64_sve_gather_prfd_scaled_sxtw:
return legalizeSVEGatherPrefetchOffsVec(N, DAG);
case Intrinsic::aarch64_neon_ld2:
case Intrinsic::aarch64_neon_ld3:
case Intrinsic::aarch64_neon_ld4:
Expand Down
42 changes: 21 additions & 21 deletions llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Expand Up @@ -880,37 +880,37 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio

// Gather prefetch using scaled 32-bit offsets, e.g.
// prfh pldl1keep, p0, [x0, z0.s, uxtw #1]
defm PRFB_S : sve_mem_32b_prfm_sv_scaled<0b00, "prfb", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>;
defm PRFH_S : sve_mem_32b_prfm_sv_scaled<0b01, "prfh", ZPR32ExtSXTW16, ZPR32ExtUXTW16>;
defm PRFW_S : sve_mem_32b_prfm_sv_scaled<0b10, "prfw", ZPR32ExtSXTW32, ZPR32ExtUXTW32>;
defm PRFD_S : sve_mem_32b_prfm_sv_scaled<0b11, "prfd", ZPR32ExtSXTW64, ZPR32ExtUXTW64>;
defm PRFB_S : sve_mem_32b_prfm_sv_scaled<0b00, "prfb", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, int_aarch64_sve_gather_prfb_scaled_sxtw, int_aarch64_sve_gather_prfb_scaled_uxtw>;
defm PRFH_S : sve_mem_32b_prfm_sv_scaled<0b01, "prfh", ZPR32ExtSXTW16, ZPR32ExtUXTW16, int_aarch64_sve_gather_prfh_scaled_sxtw, int_aarch64_sve_gather_prfh_scaled_uxtw>;
defm PRFW_S : sve_mem_32b_prfm_sv_scaled<0b10, "prfw", ZPR32ExtSXTW32, ZPR32ExtUXTW32, int_aarch64_sve_gather_prfw_scaled_sxtw, int_aarch64_sve_gather_prfw_scaled_uxtw>;
defm PRFD_S : sve_mem_32b_prfm_sv_scaled<0b11, "prfd", ZPR32ExtSXTW64, ZPR32ExtUXTW64, int_aarch64_sve_gather_prfd_scaled_sxtw, int_aarch64_sve_gather_prfd_scaled_uxtw>;

// Gather prefetch using unpacked, scaled 32-bit offsets, e.g.
// prfh pldl1keep, p0, [x0, z0.d, uxtw #1]
defm PRFB_D : sve_mem_64b_prfm_sv_ext_scaled<0b00, "prfb", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>;
defm PRFH_D : sve_mem_64b_prfm_sv_ext_scaled<0b01, "prfh", ZPR64ExtSXTW16, ZPR64ExtUXTW16>;
defm PRFW_D : sve_mem_64b_prfm_sv_ext_scaled<0b10, "prfw", ZPR64ExtSXTW32, ZPR64ExtUXTW32>;
defm PRFD_D : sve_mem_64b_prfm_sv_ext_scaled<0b11, "prfd", ZPR64ExtSXTW64, ZPR64ExtUXTW64>;
defm PRFB_D : sve_mem_64b_prfm_sv_ext_scaled<0b00, "prfb", ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, int_aarch64_sve_gather_prfb_scaled_sxtw, int_aarch64_sve_gather_prfb_scaled_uxtw>;
defm PRFH_D : sve_mem_64b_prfm_sv_ext_scaled<0b01, "prfh", ZPR64ExtSXTW16, ZPR64ExtUXTW16, int_aarch64_sve_gather_prfh_scaled_sxtw, int_aarch64_sve_gather_prfh_scaled_uxtw>;
defm PRFW_D : sve_mem_64b_prfm_sv_ext_scaled<0b10, "prfw", ZPR64ExtSXTW32, ZPR64ExtUXTW32, int_aarch64_sve_gather_prfw_scaled_sxtw, int_aarch64_sve_gather_prfw_scaled_uxtw>;
defm PRFD_D : sve_mem_64b_prfm_sv_ext_scaled<0b11, "prfd", ZPR64ExtSXTW64, ZPR64ExtUXTW64, int_aarch64_sve_gather_prfd_scaled_sxtw, int_aarch64_sve_gather_prfd_scaled_uxtw>;

// Gather prefetch using scaled 64-bit offsets, e.g.
// prfh pldl1keep, p0, [x0, z0.d, lsl #1]
defm PRFB_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b00, "prfb", ZPR64ExtLSL8>;
defm PRFH_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b01, "prfh", ZPR64ExtLSL16>;
defm PRFW_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b10, "prfw", ZPR64ExtLSL32>;
defm PRFD_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b11, "prfd", ZPR64ExtLSL64>;
defm PRFB_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b00, "prfb", ZPR64ExtLSL8, int_aarch64_sve_gather_prfb_scaled>;
defm PRFH_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b01, "prfh", ZPR64ExtLSL16, int_aarch64_sve_gather_prfh_scaled>;
defm PRFW_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b10, "prfw", ZPR64ExtLSL32, int_aarch64_sve_gather_prfw_scaled>;
defm PRFD_D_SCALED : sve_mem_64b_prfm_sv_lsl_scaled<0b11, "prfd", ZPR64ExtLSL64, int_aarch64_sve_gather_prfd_scaled>;

// Gather prefetch using 32/64-bit pointers with offset, e.g.
// prfh pldl1keep, p0, [z0.s, #16]
// prfh pldl1keep, p0, [z0.d, #16]
defm PRFB_S_PZI : sve_mem_32b_prfm_vi<0b00, "prfb", imm0_31>;
defm PRFH_S_PZI : sve_mem_32b_prfm_vi<0b01, "prfh", uimm5s2>;
defm PRFW_S_PZI : sve_mem_32b_prfm_vi<0b10, "prfw", uimm5s4>;
defm PRFD_S_PZI : sve_mem_32b_prfm_vi<0b11, "prfd", uimm5s8>;

defm PRFB_D_PZI : sve_mem_64b_prfm_vi<0b00, "prfb", imm0_31>;
defm PRFH_D_PZI : sve_mem_64b_prfm_vi<0b01, "prfh", uimm5s2>;
defm PRFW_D_PZI : sve_mem_64b_prfm_vi<0b10, "prfw", uimm5s4>;
defm PRFD_D_PZI : sve_mem_64b_prfm_vi<0b11, "prfd", uimm5s8>;
defm PRFB_S_PZI : sve_mem_32b_prfm_vi<0b00, "prfb", imm0_31, int_aarch64_sve_gather_prfb>;
defm PRFH_S_PZI : sve_mem_32b_prfm_vi<0b01, "prfh", uimm5s2, int_aarch64_sve_gather_prfh>;
defm PRFW_S_PZI : sve_mem_32b_prfm_vi<0b10, "prfw", uimm5s4, int_aarch64_sve_gather_prfw>;
defm PRFD_S_PZI : sve_mem_32b_prfm_vi<0b11, "prfd", uimm5s8, int_aarch64_sve_gather_prfd>;

defm PRFB_D_PZI : sve_mem_64b_prfm_vi<0b00, "prfb", imm0_31, int_aarch64_sve_gather_prfb>;
defm PRFH_D_PZI : sve_mem_64b_prfm_vi<0b01, "prfh", uimm5s2, int_aarch64_sve_gather_prfh>;
defm PRFW_D_PZI : sve_mem_64b_prfm_vi<0b10, "prfw", uimm5s4, int_aarch64_sve_gather_prfw>;
defm PRFD_D_PZI : sve_mem_64b_prfm_vi<0b11, "prfd", uimm5s8, int_aarch64_sve_gather_prfd>;

defm ADR_SXTW_ZZZ_D : sve_int_bin_cons_misc_0_a_sxtw<0b00, "adr">;
defm ADR_UXTW_ZZZ_D : sve_int_bin_cons_misc_0_a_uxtw<0b01, "adr">;
Expand Down
20 changes: 20 additions & 0 deletions llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
Expand Up @@ -840,6 +840,26 @@ inline static bool isAnyMOVWMovAlias(uint64_t Value, int RegWidth) {
return isAnyMOVZMovAlias(Value, RegWidth);
}

/// Check if the value of \p OffsetInBytes can be used as an immediate for
/// the gather load/prefetch and scatter store instructions with vector base and
/// immediate offset addressing mode:
///
/// [<Zn>.[S|D]{, #<imm>}]
///
/// where <imm> = sizeof(<T>) * k, for k = 0, 1, ..., 31.
static bool isValidImmForSVEVecImmAddrMode(unsigned OffsetInBytes,
unsigned ScalarSizeInBytes) {
// The immediate is not a multiple of the scalar size.
if (OffsetInBytes % ScalarSizeInBytes)
return false;

// The immediate is out of range.
if (OffsetInBytes / ScalarSizeInBytes > 31)
return false;

return true;
}

} // end namespace AArch64_AM

} // end namespace llvm
Expand Down

0 comments on commit 0f2b68d

Please sign in to comment.