Skip to content

Commit

Permalink
[AArch64] match fcvtl2 with bitcasted extract
Browse files Browse the repository at this point in the history
This should eliminate a regression seen in D63815.

If we are FP extending the high half extract of a vector,
we should be able to peek through a bitcast sitting
between the extract and extend.

This replaces tablegen patterns with a more general
DAG to DAG override, so we can handle any casted type.

Differential Revision: https://reviews.llvm.org/D71515
  • Loading branch information
rotateright committed Dec 18, 2019
1 parent c7492fb commit 5e5e99c
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 130 deletions.
35 changes: 35 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
Expand Up @@ -205,6 +205,7 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
bool tryBitfieldInsertOp(SDNode *N);
bool tryBitfieldInsertInZeroOp(SDNode *N);
bool tryShiftAmountMod(SDNode *N);
bool tryHighFPExt(SDNode *N);

bool tryReadRegister(SDNode *N);
bool tryWriteRegister(SDNode *N);
Expand Down Expand Up @@ -1803,6 +1804,35 @@ bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
return true;
}

/// Try to form fcvtl2 instructions from a floating-point extend of a high-half
/// extract of a subvector.
bool AArch64DAGToDAGISel::tryHighFPExt(SDNode *N) {
assert(N->getOpcode() == ISD::FP_EXTEND);

// There are 2 forms of fcvtl2 - extend to double or extend to float.
SDValue Extract = N->getOperand(0);
EVT VT = N->getValueType(0);
EVT NarrowVT = Extract.getValueType();
if ((VT != MVT::v2f64 || NarrowVT != MVT::v2f32) &&
(VT != MVT::v4f32 || NarrowVT != MVT::v4f16))
return false;

// Optionally look past a bitcast.
Extract = peekThroughBitcasts(Extract);
if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
return false;

// Match extract from start of high half index.
// Example: v8i16 -> v4i16 means the extract must begin at index 4.
unsigned ExtractIndex = Extract.getConstantOperandVal(1);
if (ExtractIndex != Extract.getValueType().getVectorNumElements())
return false;

auto Opcode = VT == MVT::v2f64 ? AArch64::FCVTLv4i32 : AArch64::FCVTLv8i16;
CurDAG->SelectNodeTo(N, Opcode, VT, Extract.getOperand(0));
return true;
}

static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
SDValue &Opd0, unsigned &Immr, unsigned &Imms,
unsigned NumberOfIgnoredLowBits = 0,
Expand Down Expand Up @@ -3010,6 +3040,11 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
return;
break;

case ISD::FP_EXTEND:
if (tryHighFPExt(Node))
return;
break;

case ISD::OR:
if (tryBitfieldInsertOp(Node))
return;
Expand Down
6 changes: 0 additions & 6 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Expand Up @@ -3646,14 +3646,8 @@ def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn)
(i64 4)))),
(FCVTLv8i16 V128:$Rn)>;
def : Pat<(v2f64 (fpextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>;
def : Pat<(v2f64 (fpextend (v2f32 (extract_subvector (v4f32 V128:$Rn),
(i64 2))))),
(FCVTLv4i32 V128:$Rn)>;

def : Pat<(v4f32 (fpextend (v4f16 V64:$Rn))), (FCVTLv4i16 V64:$Rn)>;
def : Pat<(v4f32 (fpextend (v4f16 (extract_subvector (v8f16 V128:$Rn),
(i64 4))))),
(FCVTLv8i16 V128:$Rn)>;

defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>;
defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>;
Expand Down
169 changes: 45 additions & 124 deletions llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
Expand Up @@ -39,21 +39,14 @@ define <2 x double> @test_vcvt_high_f64_f32(<4 x float> %x) nounwind readnone ss
}

define <2 x double> @test_vcvt_high_v1f64_f32_bitcast(<4 x float> %x) nounwind readnone ssp {
; GENERIC-LABEL: test_vcvt_high_v1f64_f32_bitcast:
; GENERIC: // %bb.0:
; GENERIC-NEXT: ext.16b v0, v0, v0, #8
; GENERIC-NEXT: fcvtl v0.2d, v0.2s
; GENERIC-NEXT: ret
;
; FAST-LABEL: test_vcvt_high_v1f64_f32_bitcast:
; FAST: // %bb.0:
; FAST-NEXT: fcvtl2 v0.2d, v0.4s
; FAST-NEXT: ret
; CHECK-LABEL: test_vcvt_high_v1f64_f32_bitcast:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtl2 v0.2d, v0.4s
; CHECK-NEXT: ret
;
; GISEL-LABEL: test_vcvt_high_v1f64_f32_bitcast:
; GISEL: // %bb.0:
; GISEL-NEXT: ext.16b v0, v0, v0, #8
; GISEL-NEXT: fcvtl v0.2d, v0.2s
; GISEL-NEXT: fcvtl2 v0.2d, v0.4s
; GISEL-NEXT: ret
%bc1 = bitcast <4 x float> %x to <2 x double>
%ext = shufflevector <2 x double> %bc1, <2 x double> undef, <1 x i32> <i32 1>
Expand All @@ -63,23 +56,14 @@ define <2 x double> @test_vcvt_high_v1f64_f32_bitcast(<4 x float> %x) nounwind r
}

define <2 x double> @test_vcvt_high_v1i64_f32_bitcast(<2 x i64> %x) nounwind readnone ssp {
; GENERIC-LABEL: test_vcvt_high_v1i64_f32_bitcast:
; GENERIC: // %bb.0:
; GENERIC-NEXT: ext.16b v0, v0, v0, #8
; GENERIC-NEXT: fcvtl v0.2d, v0.2s
; GENERIC-NEXT: ret
;
; FAST-LABEL: test_vcvt_high_v1i64_f32_bitcast:
; FAST: // %bb.0:
; FAST-NEXT: ext.16b v0, v0, v0, #8
; FAST-NEXT: // kill: def $d0 killed $d0 killed $q0
; FAST-NEXT: fcvtl v0.2d, v0.2s
; FAST-NEXT: ret
; CHECK-LABEL: test_vcvt_high_v1i64_f32_bitcast:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtl2 v0.2d, v0.4s
; CHECK-NEXT: ret
;
; GISEL-LABEL: test_vcvt_high_v1i64_f32_bitcast:
; GISEL: // %bb.0:
; GISEL-NEXT: ext.16b v0, v0, v0, #8
; GISEL-NEXT: fcvtl v0.2d, v0.2s
; GISEL-NEXT: fcvtl2 v0.2d, v0.4s
; GISEL-NEXT: ret
%ext = shufflevector <2 x i64> %x, <2 x i64> undef, <1 x i32> <i32 1>
%bc2 = bitcast <1 x i64> %ext to <2 x float>
Expand All @@ -88,23 +72,14 @@ define <2 x double> @test_vcvt_high_v1i64_f32_bitcast(<2 x i64> %x) nounwind rea
}

define <2 x double> @test_vcvt_high_v2i32_f32_bitcast(<4 x i32> %x) nounwind readnone ssp {
; GENERIC-LABEL: test_vcvt_high_v2i32_f32_bitcast:
; GENERIC: // %bb.0:
; GENERIC-NEXT: ext.16b v0, v0, v0, #8
; GENERIC-NEXT: fcvtl v0.2d, v0.2s
; GENERIC-NEXT: ret
;
; FAST-LABEL: test_vcvt_high_v2i32_f32_bitcast:
; FAST: // %bb.0:
; FAST-NEXT: ext.16b v0, v0, v0, #8
; FAST-NEXT: // kill: def $d0 killed $d0 killed $q0
; FAST-NEXT: fcvtl v0.2d, v0.2s
; FAST-NEXT: ret
; CHECK-LABEL: test_vcvt_high_v2i32_f32_bitcast:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtl2 v0.2d, v0.4s
; CHECK-NEXT: ret
;
; GISEL-LABEL: test_vcvt_high_v2i32_f32_bitcast:
; GISEL: // %bb.0:
; GISEL-NEXT: ext.16b v0, v0, v0, #8
; GISEL-NEXT: fcvtl v0.2d, v0.2s
; GISEL-NEXT: fcvtl2 v0.2d, v0.4s
; GISEL-NEXT: ret
%ext = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%bc2 = bitcast <2 x i32> %ext to <2 x float>
Expand All @@ -113,23 +88,14 @@ define <2 x double> @test_vcvt_high_v2i32_f32_bitcast(<4 x i32> %x) nounwind rea
}

define <2 x double> @test_vcvt_high_v4i16_f32_bitcast(<8 x i16> %x) nounwind readnone ssp {
; GENERIC-LABEL: test_vcvt_high_v4i16_f32_bitcast:
; GENERIC: // %bb.0:
; GENERIC-NEXT: ext.16b v0, v0, v0, #8
; GENERIC-NEXT: fcvtl v0.2d, v0.2s
; GENERIC-NEXT: ret
;
; FAST-LABEL: test_vcvt_high_v4i16_f32_bitcast:
; FAST: // %bb.0:
; FAST-NEXT: ext.16b v0, v0, v0, #8
; FAST-NEXT: // kill: def $d0 killed $d0 killed $q0
; FAST-NEXT: fcvtl v0.2d, v0.2s
; FAST-NEXT: ret
; CHECK-LABEL: test_vcvt_high_v4i16_f32_bitcast:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtl2 v0.2d, v0.4s
; CHECK-NEXT: ret
;
; GISEL-LABEL: test_vcvt_high_v4i16_f32_bitcast:
; GISEL: // %bb.0:
; GISEL-NEXT: ext.16b v0, v0, v0, #8
; GISEL-NEXT: fcvtl v0.2d, v0.2s
; GISEL-NEXT: fcvtl2 v0.2d, v0.4s
; GISEL-NEXT: ret
%ext = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%bc2 = bitcast <4 x i16> %ext to <2 x float>
Expand All @@ -138,23 +104,14 @@ define <2 x double> @test_vcvt_high_v4i16_f32_bitcast(<8 x i16> %x) nounwind rea
}

define <2 x double> @test_vcvt_high_v8i8_f32_bitcast(<16 x i8> %x) nounwind readnone ssp {
; GENERIC-LABEL: test_vcvt_high_v8i8_f32_bitcast:
; GENERIC: // %bb.0:
; GENERIC-NEXT: ext.16b v0, v0, v0, #8
; GENERIC-NEXT: fcvtl v0.2d, v0.2s
; GENERIC-NEXT: ret
;
; FAST-LABEL: test_vcvt_high_v8i8_f32_bitcast:
; FAST: // %bb.0:
; FAST-NEXT: ext.16b v0, v0, v0, #8
; FAST-NEXT: // kill: def $d0 killed $d0 killed $q0
; FAST-NEXT: fcvtl v0.2d, v0.2s
; FAST-NEXT: ret
; CHECK-LABEL: test_vcvt_high_v8i8_f32_bitcast:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtl2 v0.2d, v0.4s
; CHECK-NEXT: ret
;
; GISEL-LABEL: test_vcvt_high_v8i8_f32_bitcast:
; GISEL: // %bb.0:
; GISEL-NEXT: ext.16b v0, v0, v0, #8
; GISEL-NEXT: fcvtl v0.2d, v0.2s
; GISEL-NEXT: fcvtl2 v0.2d, v0.4s
; GISEL-NEXT: ret
%ext = shufflevector <16 x i8> %x, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%bc2 = bitcast <8 x i8> %ext to <2 x float>
Expand All @@ -163,23 +120,14 @@ define <2 x double> @test_vcvt_high_v8i8_f32_bitcast(<16 x i8> %x) nounwind read
}

define <4 x float> @test_vcvt_high_v1i64_f16_bitcast(<2 x i64> %x) nounwind readnone ssp {
; GENERIC-LABEL: test_vcvt_high_v1i64_f16_bitcast:
; GENERIC: // %bb.0:
; GENERIC-NEXT: ext.16b v0, v0, v0, #8
; GENERIC-NEXT: fcvtl v0.4s, v0.4h
; GENERIC-NEXT: ret
;
; FAST-LABEL: test_vcvt_high_v1i64_f16_bitcast:
; FAST: // %bb.0:
; FAST-NEXT: ext.16b v0, v0, v0, #8
; FAST-NEXT: // kill: def $d0 killed $d0 killed $q0
; FAST-NEXT: fcvtl v0.4s, v0.4h
; FAST-NEXT: ret
; CHECK-LABEL: test_vcvt_high_v1i64_f16_bitcast:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtl2 v0.4s, v0.8h
; CHECK-NEXT: ret
;
; GISEL-LABEL: test_vcvt_high_v1i64_f16_bitcast:
; GISEL: // %bb.0:
; GISEL-NEXT: ext.16b v0, v0, v0, #8
; GISEL-NEXT: fcvtl v0.4s, v0.4h
; GISEL-NEXT: fcvtl2 v0.4s, v0.8h
; GISEL-NEXT: ret
%ext = shufflevector <2 x i64> %x, <2 x i64> undef, <1 x i32> <i32 1>
%bc2 = bitcast <1 x i64> %ext to <4 x half>
Expand All @@ -188,23 +136,14 @@ define <4 x float> @test_vcvt_high_v1i64_f16_bitcast(<2 x i64> %x) nounwind read
}

define <4 x float> @test_vcvt_high_v2i32_f16_bitcast(<4 x i32> %x) nounwind readnone ssp {
; GENERIC-LABEL: test_vcvt_high_v2i32_f16_bitcast:
; GENERIC: // %bb.0:
; GENERIC-NEXT: ext.16b v0, v0, v0, #8
; GENERIC-NEXT: fcvtl v0.4s, v0.4h
; GENERIC-NEXT: ret
;
; FAST-LABEL: test_vcvt_high_v2i32_f16_bitcast:
; FAST: // %bb.0:
; FAST-NEXT: ext.16b v0, v0, v0, #8
; FAST-NEXT: // kill: def $d0 killed $d0 killed $q0
; FAST-NEXT: fcvtl v0.4s, v0.4h
; FAST-NEXT: ret
; CHECK-LABEL: test_vcvt_high_v2i32_f16_bitcast:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtl2 v0.4s, v0.8h
; CHECK-NEXT: ret
;
; GISEL-LABEL: test_vcvt_high_v2i32_f16_bitcast:
; GISEL: // %bb.0:
; GISEL-NEXT: ext.16b v0, v0, v0, #8
; GISEL-NEXT: fcvtl v0.4s, v0.4h
; GISEL-NEXT: fcvtl2 v0.4s, v0.8h
; GISEL-NEXT: ret
%ext = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%bc2 = bitcast <2 x i32> %ext to <4 x half>
Expand All @@ -213,23 +152,14 @@ define <4 x float> @test_vcvt_high_v2i32_f16_bitcast(<4 x i32> %x) nounwind read
}

define <4 x float> @test_vcvt_high_v4i16_f16_bitcast(<8 x i16> %x) nounwind readnone ssp {
; GENERIC-LABEL: test_vcvt_high_v4i16_f16_bitcast:
; GENERIC: // %bb.0:
; GENERIC-NEXT: ext.16b v0, v0, v0, #8
; GENERIC-NEXT: fcvtl v0.4s, v0.4h
; GENERIC-NEXT: ret
;
; FAST-LABEL: test_vcvt_high_v4i16_f16_bitcast:
; FAST: // %bb.0:
; FAST-NEXT: ext.16b v0, v0, v0, #8
; FAST-NEXT: // kill: def $d0 killed $d0 killed $q0
; FAST-NEXT: fcvtl v0.4s, v0.4h
; FAST-NEXT: ret
; CHECK-LABEL: test_vcvt_high_v4i16_f16_bitcast:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtl2 v0.4s, v0.8h
; CHECK-NEXT: ret
;
; GISEL-LABEL: test_vcvt_high_v4i16_f16_bitcast:
; GISEL: // %bb.0:
; GISEL-NEXT: ext.16b v0, v0, v0, #8
; GISEL-NEXT: fcvtl v0.4s, v0.4h
; GISEL-NEXT: fcvtl2 v0.4s, v0.8h
; GISEL-NEXT: ret
%ext = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%bc2 = bitcast <4 x i16> %ext to <4 x half>
Expand All @@ -238,23 +168,14 @@ define <4 x float> @test_vcvt_high_v4i16_f16_bitcast(<8 x i16> %x) nounwind read
}

define <4 x float> @test_vcvt_high_v8i8_f16_bitcast(<16 x i8> %x) nounwind readnone ssp {
; GENERIC-LABEL: test_vcvt_high_v8i8_f16_bitcast:
; GENERIC: // %bb.0:
; GENERIC-NEXT: ext.16b v0, v0, v0, #8
; GENERIC-NEXT: fcvtl v0.4s, v0.4h
; GENERIC-NEXT: ret
;
; FAST-LABEL: test_vcvt_high_v8i8_f16_bitcast:
; FAST: // %bb.0:
; FAST-NEXT: ext.16b v0, v0, v0, #8
; FAST-NEXT: // kill: def $d0 killed $d0 killed $q0
; FAST-NEXT: fcvtl v0.4s, v0.4h
; FAST-NEXT: ret
; CHECK-LABEL: test_vcvt_high_v8i8_f16_bitcast:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtl2 v0.4s, v0.8h
; CHECK-NEXT: ret
;
; GISEL-LABEL: test_vcvt_high_v8i8_f16_bitcast:
; GISEL: // %bb.0:
; GISEL-NEXT: ext.16b v0, v0, v0, #8
; GISEL-NEXT: fcvtl v0.4s, v0.4h
; GISEL-NEXT: fcvtl2 v0.4s, v0.8h
; GISEL-NEXT: ret
%ext = shufflevector <16 x i8> %x, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%bc2 = bitcast <8 x i8> %ext to <4 x half>
Expand Down

0 comments on commit 5e5e99c

Please sign in to comment.