Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AArch64] Avoid using NEON FCVTXN in Streaming-SVE mode. #91981

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 23 additions & 5 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19507,7 +19507,27 @@ static SDValue performBuildVectorCombine(SDNode *N,
SDLoc DL(N);
EVT VT = N->getValueType(0);

if (VT == MVT::v4f16 || VT == MVT::v4bf16) {
const auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
bool CanUseFCVTXN = Subtarget.isNeonAvailable() ||
(Subtarget.useSVEForFixedLengthVectors() &&
(Subtarget.hasSVE2() || Subtarget.hasSME()));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This isn't sufficient to say we're in streaming mode is it? Don't we need something like || (Subtarget.hasSME() && Subtarget.isStreaming())

if (CanUseFCVTXN && (VT == MVT::v4f16 || VT == MVT::v4bf16)) {
// Convenience function to build an FCVT instruction, which is needed
// once for the bottom bits and once for the top bits.
auto MakeFCVTXN = [&](SDValue V) {
if (Subtarget.isNeonAvailable())
return DAG.getNode(AArch64ISD::FCVTXN, DL, MVT::v2f32, V);
else {
SDValue In = convertToScalableVector(DAG, MVT::nxv2f64, V);
SDValue PTrue = getPredicateForVector(DAG, DL, MVT::v2f64);
SDValue ID = DAG.getTargetConstant(Intrinsic::aarch64_sve_fcvtx_f32f64,
DL, MVT::i64);
SDValue Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::nxv4f32,
{ID, In, PTrue, In});
return convertFromScalableVector(DAG, MVT::v2f32, Op);
}
};

SDValue Elt0 = N->getOperand(0), Elt1 = N->getOperand(1),
Elt2 = N->getOperand(2), Elt3 = N->getOperand(3);
if (Elt0->getOpcode() == ISD::FP_ROUND &&
Expand Down Expand Up @@ -19548,12 +19568,10 @@ static SDValue performBuildVectorCombine(SDNode *N,
Elt2->getOperand(0)->getConstantOperandVal(1) == 0 &&
Elt3->getOperand(0)->getConstantOperandVal(1) == 1) {
SDValue HighLanesSrcVec = Elt2->getOperand(0)->getOperand(0);
HighLanes =
DAG.getNode(AArch64ISD::FCVTXN, DL, MVT::v2f32, HighLanesSrcVec);
HighLanes = MakeFCVTXN(HighLanesSrcVec);
}
if (HighLanes) {
SDValue DoubleToSingleSticky =
DAG.getNode(AArch64ISD::FCVTXN, DL, MVT::v2f32, LowLanesSrcVec);
SDValue DoubleToSingleSticky = MakeFCVTXN(LowLanesSrcVec);
SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32,
DoubleToSingleSticky, HighLanes);
return DAG.getNode(ISD::FP_ROUND, DL, VT, Concat,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -427,28 +427,36 @@ define void @test_copysign_v4f16_v4f32(ptr %ap, ptr %bp) {
define void @test_copysign_v4f16_v4f64(ptr %ap, ptr %bp) {
; SVE-LABEL: test_copysign_v4f16_v4f64:
; SVE: // %bb.0:
; SVE-NEXT: ldp q0, q1, [x1]
; SVE-NEXT: ptrue p0.s, vl2
; SVE-NEXT: fcvtxn v1.2s, v1.2d
; SVE-NEXT: fcvtxn v0.2s, v0.2d
; SVE-NEXT: splice z0.s, p0, z0.s, z1.s
; SVE-NEXT: ptrue p0.s
; SVE-NEXT: ldr d1, [x0]
; SVE-NEXT: and z1.h, z1.h, #0x7fff
; SVE-NEXT: fcvt z0.h, p0/m, z0.s
; SVE-NEXT: uzp1 z0.h, z0.h, z0.h
; SVE-NEXT: sub sp, sp, #16
; SVE-NEXT: .cfi_def_cfa_offset 16
; SVE-NEXT: ldp q1, q0, [x1]
; SVE-NEXT: ldr d4, [x0]
; SVE-NEXT: and z4.h, z4.h, #0x7fff
; SVE-NEXT: mov z2.d, z0.d[1]
; SVE-NEXT: mov z3.d, z1.d[1]
; SVE-NEXT: fcvt h0, d0
; SVE-NEXT: fcvt h1, d1
; SVE-NEXT: fcvt h2, d2
; SVE-NEXT: fcvt h3, d3
; SVE-NEXT: str h0, [sp, #12]
; SVE-NEXT: str h1, [sp, #8]
; SVE-NEXT: str h2, [sp, #14]
; SVE-NEXT: str h3, [sp, #10]
; SVE-NEXT: ldr d0, [sp, #8]
; SVE-NEXT: and z0.h, z0.h, #0x8000
; SVE-NEXT: orr z0.d, z1.d, z0.d
; SVE-NEXT: orr z0.d, z4.d, z0.d
; SVE-NEXT: str d0, [x0]
; SVE-NEXT: add sp, sp, #16
; SVE-NEXT: ret
;
; SVE2-LABEL: test_copysign_v4f16_v4f64:
; SVE2: // %bb.0:
; SVE2-NEXT: ldp q0, q1, [x1]
; SVE2-NEXT: ptrue p0.s, vl2
; SVE2-NEXT: ptrue p0.d, vl2
; SVE2-NEXT: ldr d2, [x0]
; SVE2-NEXT: fcvtxn v1.2s, v1.2d
; SVE2-NEXT: fcvtxn v0.2s, v0.2d
; SVE2-NEXT: fcvtx z1.s, p0/m, z1.d
; SVE2-NEXT: fcvtx z0.s, p0/m, z0.d
; SVE2-NEXT: ptrue p0.s, vl2
; SVE2-NEXT: splice z0.s, p0, z0.s, z1.s
; SVE2-NEXT: ptrue p0.s
; SVE2-NEXT: mov z1.h, #32767 // =0x7fff
Expand Down
Loading