Skip to content

Commit

Permalink
[SVE][CodeGen] Fix issues with EXTRACT_SUBVECTOR when using scalable …
Browse files Browse the repository at this point in the history
…FP vectors

In this patch I have fixed two issues:

1. Our SVE tuple get/set intrinsics were using the wrong constant type
for the index passed to EXTRACT_SUBVECTOR. I have fixed this by using the
function SelectionDAG::getVectorIdxConstant to create the value. Also, I
have updated the documentation for EXTRACT_SUBVECTOR describing what type
the constant index should be and we now enforce this when creating the
node.
2. The AArch64 backend was missing the appropriate patterns for
extracting certain subvectors (nxv4f16 and nxv2f32) from legal SVE types.
I have added them as part of this patch.

The only way that I could find to test the new patterns was to use the
SVE tuple get intrinsics, although I realise it looks a bit unusual.
Tests added here:

  test/CodeGen/AArch64/sve-extract-subvector.ll

Differential Revision: https://reviews.llvm.org/D85516
  • Loading branch information
david-arm committed Aug 12, 2020
1 parent 5d54921 commit 88bbd30
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 6 deletions.
3 changes: 2 additions & 1 deletion llvm/include/llvm/CodeGen/ISDOpcodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -514,7 +514,8 @@ enum NodeType {
/// IDX is first scaled by the runtime scaling factor of T. Elements IDX
/// through (IDX + num_elements(T) - 1) must be valid VECTOR indices. If this
/// condition cannot be determined statically but is false at runtime, then
/// the result vector is undefined.
/// the result vector is undefined. The IDX parameter must be a vector index
/// constant type, which for most targets will be an integer pointer type.
///
/// This operation supports extracting a fixed-width vector from a scalable
/// vector, but not the other way around.
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5560,6 +5560,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
(VT.getVectorMinNumElements() + N2C->getZExtValue()) <=
N1VT.getVectorMinNumElements()) &&
"Extract subvector overflow!");
assert(N2C->getAPIntValue().getBitWidth() ==
TLI->getVectorIdxTy(getDataLayout())
.getSizeInBits()
.getFixedSize() &&
"Constant index for EXTRACT_SUBVECTOR has an invalid size");

// Trivial extraction.
if (VT == N1VT)
Expand Down
10 changes: 5 additions & 5 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14235,9 +14235,9 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
EVT ResVT = N->getValueType(0);
uint64_t NumLanes = ResVT.getVectorElementCount().Min;
SDValue ExtIdx = DAG.getVectorIdxConstant(IdxConst * NumLanes, DL);
SDValue Val =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Src1,
DAG.getConstant(IdxConst * NumLanes, DL, MVT::i32));
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Src1, ExtIdx);
return DAG.getMergeValues({Val, Chain}, DL);
}
case Intrinsic::aarch64_sve_tuple_set: {
Expand All @@ -14263,9 +14263,9 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
if (I == IdxConst)
Opnds.push_back(Vec);
else {
Opnds.push_back(
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, Vec.getValueType(), Tuple,
DAG.getConstant(I * NumLanes, DL, MVT::i32)));
SDValue ExtIdx = DAG.getVectorIdxConstant(I * NumLanes, DL);
Opnds.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
Vec.getValueType(), Tuple, ExtIdx));
}
}
SDValue Concat =
Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1155,6 +1155,16 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
def : Pat<(nxv8i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))),
(ZIP2_PPP_B PPR:$Ps, (PFALSE))>;

// Extract subvectors from FP SVE vectors
def : Pat<(nxv4f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 0))),
(UUNPKLO_ZZ_S ZPR:$Zs)>;
def : Pat<(nxv4f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 4))),
(UUNPKHI_ZZ_S ZPR:$Zs)>;
def : Pat<(nxv2f32 (extract_subvector (nxv4f32 ZPR:$Zs), (i64 0))),
(UUNPKLO_ZZ_D ZPR:$Zs)>;
def : Pat<(nxv2f32 (extract_subvector (nxv4f32 ZPR:$Zs), (i64 2))),
(UUNPKHI_ZZ_D ZPR:$Zs)>;

// Concatenate two predicates.
def : Pat<(nxv4i1 (concat_vectors nxv2i1:$p1, nxv2i1:$p2)),
(UZP1_PPP_S $p1, $p2)>;
Expand Down
38 changes: 38 additions & 0 deletions llvm/test/CodeGen/AArch64/sve-extract-subvector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,43 @@ define <vscale x 2 x i64> @extract_nxv2i64_nxv32i8(<vscale x 32 x i8> %z0_z1) {
ret <vscale x 2 x i64> %ext
}

define <vscale x 4 x half> @extract_lo_nxv4f16_nxv8f16(<vscale x 8 x half> %z0) {
; CHECK-LABEL: extract_lo_nxv4f16_nxv8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: ret
%ext = call <vscale x 4 x half> @llvm.aarch64.sve.tuple.get.nxv8f16(<vscale x 8 x half> %z0, i32 0)
ret <vscale x 4 x half> %ext
}

define <vscale x 4 x half> @extract_hi_nxv4f16_nxv8f16(<vscale x 8 x half> %z0) {
; CHECK-LABEL: extract_hi_nxv4f16_nxv8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpkhi z0.s, z0.h
; CHECK-NEXT: ret
%ext = call <vscale x 4 x half> @llvm.aarch64.sve.tuple.get.nxv8f16(<vscale x 8 x half> %z0, i32 1)
ret <vscale x 4 x half> %ext
}

define <vscale x 2 x float> @extract_lo_nxv2f32_nxv4f32(<vscale x 4 x float> %z0) {
; CHECK-LABEL: extract_lo_nxv2f32_nxv4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: ret
%ext = call <vscale x 2 x float> @llvm.aarch64.sve.tuple.get.nxv4f32(<vscale x 4 x float> %z0, i32 0)
ret <vscale x 2 x float> %ext
}

define <vscale x 2 x float> @extract_hi_nxv2f32_nxv4f32(<vscale x 4 x float> %z0) {
; CHECK-LABEL: extract_hi_nxv2f32_nxv4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpkhi z0.d, z0.s
; CHECK-NEXT: ret
%ext = call <vscale x 2 x float> @llvm.aarch64.sve.tuple.get.nxv4f32(<vscale x 4 x float> %z0, i32 1)
ret <vscale x 2 x float> %ext
}

declare <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv4i64(<vscale x 4 x i64>, i32)
declare <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv32i8(<vscale x 32 x i8>, i32)
declare <vscale x 2 x float> @llvm.aarch64.sve.tuple.get.nxv4f32(<vscale x 4 x float>, i32)
declare <vscale x 4 x half> @llvm.aarch64.sve.tuple.get.nxv8f16(<vscale x 8 x half>, i32)

0 comments on commit 88bbd30

Please sign in to comment.