Skip to content

Commit

Permalink
[SelectionDAG] Make WidenVecRes_Convert work for scalable vectors.
Browse files Browse the repository at this point in the history
Most of the code wasn't yet scalable safe, although most of the
code conceptually just works for scalable vectors. This change
makes the algorithm work on ElementCount, where appropriate,
and leaves the fixed-width only code to use `getFixedNumElements`.

Reviewed By: david-arm

Differential Revision: https://reviews.llvm.org/D110058
  • Loading branch information
sdesmalen-arm committed Sep 22, 2021
1 parent 41492d7 commit 4ca1fbe
Show file tree
Hide file tree
Showing 2 changed files with 174 additions and 9 deletions.
19 changes: 10 additions & 9 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
Expand Up @@ -3623,7 +3623,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
SDLoc DL(N);

EVT WidenVT = TLI.getTypeToTransformTo(Ctx, N->getValueType(0));
unsigned WidenNumElts = WidenVT.getVectorNumElements();
ElementCount WidenEC = WidenVT.getVectorElementCount();

EVT InVT = InOp.getValueType();

Expand All @@ -3643,14 +3643,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
}

EVT InEltVT = InVT.getVectorElementType();
EVT InWidenVT = EVT::getVectorVT(Ctx, InEltVT, WidenNumElts);
unsigned InVTNumElts = InVT.getVectorNumElements();
EVT InWidenVT = EVT::getVectorVT(Ctx, InEltVT, WidenEC);
ElementCount InVTEC = InVT.getVectorElementCount();

if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
InOp = GetWidenedVector(N->getOperand(0));
InVT = InOp.getValueType();
InVTNumElts = InVT.getVectorNumElements();
if (InVTNumElts == WidenNumElts) {
InVTEC = InVT.getVectorElementCount();
if (InVTEC == WidenEC) {
if (N->getNumOperands() == 1)
return DAG.getNode(Opcode, DL, WidenVT, InOp);
return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags);
Expand All @@ -3674,9 +3674,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
// it an illegal type that might lead to repeatedly splitting the input
// and then widening it. To avoid this, we widen the input only if
// it results in a legal type.
if (WidenNumElts % InVTNumElts == 0) {
if (WidenEC.isKnownMultipleOf(InVTEC.getKnownMinValue())) {
// Widen the input and call convert on the widened input vector.
unsigned NumConcat = WidenNumElts/InVTNumElts;
unsigned NumConcat =
WidenEC.getKnownMinValue() / InVTEC.getKnownMinValue();
SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT));
Ops[0] = InOp;
SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops);
Expand All @@ -3685,7 +3686,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1), Flags);
}

if (InVTNumElts % WidenNumElts == 0) {
if (InVTEC.isKnownMultipleOf(WidenEC.getKnownMinValue())) {
SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp,
DAG.getVectorIdxConstant(0, DL));
// Extract the input and convert the shorten input vector.
Expand All @@ -3697,7 +3698,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {

// Otherwise unroll into some nasty scalar code and rebuild the vector.
EVT EltVT = WidenVT.getVectorElementType();
SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT));
SmallVector<SDValue, 16> Ops(WidenEC.getFixedValue(), DAG.getUNDEF(EltVT));
// Use the original element count so we don't do more scalar opts than
// necessary.
unsigned MinElts = N->getValueType(0).getVectorNumElements();
Expand Down
164 changes: 164 additions & 0 deletions llvm/test/CodeGen/AArch64/sve-fcvt.ll
Expand Up @@ -15,6 +15,16 @@ define <vscale x 2 x float> @fcvts_nxv2f16(<vscale x 2 x half> %a) {
ret <vscale x 2 x float> %res
}

define <vscale x 3 x float> @fcvts_nxv3f16(<vscale x 3 x half> %a) {
; CHECK-LABEL: fcvts_nxv3f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcvt z0.s, p0/m, z0.h
; CHECK-NEXT: ret
%res = fpext <vscale x 3 x half> %a to <vscale x 3 x float>
ret <vscale x 3 x float> %res
}

define <vscale x 4 x float> @fcvts_nxv4f16(<vscale x 4 x half> %a) {
; CHECK-LABEL: fcvts_nxv4f16:
; CHECK: // %bb.0:
Expand Down Expand Up @@ -59,6 +69,16 @@ define <vscale x 2 x half> @fcvth_nxv2f32(<vscale x 2 x float> %a) {
ret <vscale x 2 x half> %res
}

define <vscale x 3 x half> @fcvth_nxv3f32(<vscale x 3 x float> %a) {
; CHECK-LABEL: fcvth_nxv3f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcvt z0.h, p0/m, z0.s
; CHECK-NEXT: ret
%res = fptrunc <vscale x 3 x float> %a to <vscale x 3 x half>
ret <vscale x 3 x half> %res
}

define <vscale x 4 x half> @fcvth_nxv4f32(<vscale x 4 x float> %a) {
; CHECK-LABEL: fcvth_nxv4f32:
; CHECK: // %bb.0:
Expand Down Expand Up @@ -143,6 +163,16 @@ define <vscale x 4 x i16> @fcvtzs_h_nxv4f32(<vscale x 4 x float> %a) {
ret <vscale x 4 x i16> %res
}

define <vscale x 7 x i16> @fcvtzs_h_nxv7f16(<vscale x 7 x half> %a) {
; CHECK-LABEL: fcvtzs_h_nxv7f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h
; CHECK-NEXT: ret
%res = fptosi <vscale x 7 x half> %a to <vscale x 7 x i16>
ret <vscale x 7 x i16> %res
}

define <vscale x 8 x i16> @fcvtzs_h_nxv8f16(<vscale x 8 x half> %a) {
; CHECK-LABEL: fcvtzs_h_nxv8f16:
; CHECK: // %bb.0:
Expand Down Expand Up @@ -193,6 +223,16 @@ define <vscale x 4 x i32> @fcvtzs_s_nxv4f16(<vscale x 4 x half> %a) {
ret <vscale x 4 x i32> %res
}

define <vscale x 3 x i32> @fcvtzs_s_nxv3f16(<vscale x 3 x half> %a) {
; CHECK-LABEL: fcvtzs_s_nxv3f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h
; CHECK-NEXT: ret
%res = fptosi <vscale x 3 x half> %a to <vscale x 3 x i32>
ret <vscale x 3 x i32> %res
}

define <vscale x 4 x i32> @fcvtzs_s_nxv4f32(<vscale x 4 x float> %a) {
; CHECK-LABEL: fcvtzs_s_nxv4f32:
; CHECK: // %bb.0:
Expand Down Expand Up @@ -289,6 +329,16 @@ define <vscale x 4 x i16> @fcvtzu_h_nxv4f32(<vscale x 4 x float> %a) {
ret <vscale x 4 x i16> %res
}

define <vscale x 7 x i16> @fcvtzu_h_nxv7f16(<vscale x 7 x half> %a) {
; CHECK-LABEL: fcvtzu_h_nxv7f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h
; CHECK-NEXT: ret
%res = fptoui <vscale x 7 x half> %a to <vscale x 7 x i16>
ret <vscale x 7 x i16> %res
}

define <vscale x 8 x i16> @fcvtzu_h_nxv8f16(<vscale x 8 x half> %a) {
; CHECK-LABEL: fcvtzu_h_nxv8f16:
; CHECK: // %bb.0:
Expand Down Expand Up @@ -329,6 +379,26 @@ define <vscale x 2 x i32> @fcvtzu_s_nxv2f64(<vscale x 2 x double> %a) {
ret <vscale x 2 x i32> %res
}

define <vscale x 3 x i32> @fcvtzu_s_nxv3f16(<vscale x 3 x half> %a) {
; CHECK-LABEL: fcvtzu_s_nxv3f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h
; CHECK-NEXT: ret
%res = fptoui <vscale x 3 x half> %a to <vscale x 3 x i32>
ret <vscale x 3 x i32> %res
}

define <vscale x 3 x i32> @fcvtzu_s_nxv3f32(<vscale x 3 x float> %a) {
; CHECK-LABEL: fcvtzu_s_nxv3f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
; CHECK-NEXT: ret
%res = fptoui <vscale x 3 x float> %a to <vscale x 3 x i32>
ret <vscale x 3 x i32> %res
}

define <vscale x 4 x i32> @fcvtzu_s_nxv4f16(<vscale x 4 x half> %a) {
; CHECK-LABEL: fcvtzu_s_nxv4f16:
; CHECK: // %bb.0:
Expand Down Expand Up @@ -422,6 +492,27 @@ define <vscale x 2 x half> @scvtf_h_nxv2i64(<vscale x 2 x i64> %a) {
ret <vscale x 2 x half> %res
}

define <vscale x 3 x half> @scvtf_h_nxv3i1(<vscale x 3 x i1> %a) {
; CHECK-LABEL: scvtf_h_nxv3i1:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: scvtf z0.h, p0/m, z0.s
; CHECK-NEXT: ret
%res = sitofp <vscale x 3 x i1> %a to <vscale x 3 x half>
ret <vscale x 3 x half> %res
}

define <vscale x 3 x half> @scvtf_h_nxv3i16(<vscale x 3 x i16> %a) {
; CHECK-LABEL: scvtf_h_nxv3i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: scvtf z0.h, p0/m, z0.h
; CHECK-NEXT: ret
%res = sitofp <vscale x 3 x i16> %a to <vscale x 3 x half>
ret <vscale x 3 x half> %res
}

define <vscale x 4 x half> @scvtf_h_nxv4i1(<vscale x 4 x i1> %a) {
; CHECK-LABEL: scvtf_h_nxv4i1:
; CHECK: // %bb.0:
Expand Down Expand Up @@ -453,6 +544,27 @@ define <vscale x 4 x half> @scvtf_h_nxv4i32(<vscale x 4 x i32> %a) {
ret <vscale x 4 x half> %res
}

define <vscale x 7 x half> @scvtf_h_nxv7i1(<vscale x 7 x i1> %a) {
; CHECK-LABEL: scvtf_h_nxv7i1:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: scvtf z0.h, p0/m, z0.h
; CHECK-NEXT: ret
%res = sitofp <vscale x 7 x i1> %a to <vscale x 7 x half>
ret <vscale x 7 x half> %res
}

define <vscale x 7 x half> @scvtf_h_nxv7i16(<vscale x 7 x i16> %a) {
; CHECK-LABEL: scvtf_h_nxv7i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: scvtf z0.h, p0/m, z0.h
; CHECK-NEXT: ret
%res = sitofp <vscale x 7 x i16> %a to <vscale x 7 x half>
ret <vscale x 7 x half> %res
}

define <vscale x 8 x half> @scvtf_h_nxv8i1(<vscale x 8 x i1> %a) {
; CHECK-LABEL: scvtf_h_nxv8i1:
; CHECK: // %bb.0:
Expand Down Expand Up @@ -505,6 +617,27 @@ define <vscale x 2 x float> @scvtf_s_nxv2i64(<vscale x 2 x i64> %a) {
ret <vscale x 2 x float> %res
}

define <vscale x 3 x float> @scvtf_s_nxv3i1(<vscale x 3 x i1> %a) {
; CHECK-LABEL: scvtf_s_nxv3i1:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
; CHECK-NEXT: ret
%res = sitofp <vscale x 3 x i1> %a to <vscale x 3 x float>
ret <vscale x 3 x float> %res
}

define <vscale x 3 x float> @scvtf_s_nxv3i32(<vscale x 3 x i32> %a) {
; CHECK-LABEL: scvtf_s_nxv3i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
; CHECK-NEXT: ret
%res = sitofp <vscale x 3 x i32> %a to <vscale x 3 x float>
ret <vscale x 3 x float> %res
}

define <vscale x 4 x float> @scvtf_s_nxv4i1(<vscale x 4 x i1> %a) {
; CHECK-LABEL: scvtf_s_nxv4i1:
; CHECK: // %bb.0:
Expand Down Expand Up @@ -600,6 +733,37 @@ define <vscale x 2 x half> @ucvtf_h_nxv2i64(<vscale x 2 x i64> %a) {
ret <vscale x 2 x half> %res
}

define <vscale x 3 x half> @ucvtf_h_nxv3i1(<vscale x 3 x i1> %a) {
; CHECK-LABEL: ucvtf_h_nxv3i1:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s
; CHECK-NEXT: ret
%res = uitofp <vscale x 3 x i1> %a to <vscale x 3 x half>
ret <vscale x 3 x half> %res
}

define <vscale x 3 x half> @ucvtf_h_nxv3i16(<vscale x 3 x i16> %a) {
; CHECK-LABEL: ucvtf_h_nxv3i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ucvtf z0.h, p0/m, z0.h
; CHECK-NEXT: ret
%res = uitofp <vscale x 3 x i16> %a to <vscale x 3 x half>
ret <vscale x 3 x half> %res
}

define <vscale x 3 x half> @ucvtf_h_nxv3i32(<vscale x 3 x i32> %a) {
; CHECK-LABEL: ucvtf_h_nxv3i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s
; CHECK-NEXT: ret
%res = uitofp <vscale x 3 x i32> %a to <vscale x 3 x half>
ret <vscale x 3 x half> %res
}

define <vscale x 4 x half> @ucvtf_h_nxv4i1(<vscale x 4 x i1> %a) {
; CHECK-LABEL: ucvtf_h_nxv4i1:
; CHECK: // %bb.0:
Expand Down

0 comments on commit 4ca1fbe

Please sign in to comment.