Skip to content

Commit

Permalink
[AArch64] Combine vector fptoi.sat(fmul) to fixed point fcvtz
Browse files Browse the repository at this point in the history
Similar to D113199 but dealing with the vector size, this extends the
fptosi+fmul to fixed point fold to handle fptosi.sat nodes that are
equally viable, so long as the saturation width matches the output
width.

Differential Revision: https://reviews.llvm.org/D113200
  • Loading branch information
davemgreen committed Nov 10, 2021
1 parent 00aa0ae commit 509b397
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 43 deletions.
15 changes: 13 additions & 2 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Expand Up @@ -895,9 +895,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::UINT_TO_FP);

// TODO: Do the same for FP_TO_*INT_SAT.
setTargetDAGCombine(ISD::FP_TO_SINT);
setTargetDAGCombine(ISD::FP_TO_UINT);
setTargetDAGCombine(ISD::FP_TO_SINT_SAT);
setTargetDAGCombine(ISD::FP_TO_UINT_SAT);
setTargetDAGCombine(ISD::FDIV);

// Try and combine setcc with csel
Expand Down Expand Up @@ -13432,8 +13433,16 @@ static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
assert((ResTy != MVT::v4i64 || DCI.isBeforeLegalizeOps()) &&
"Illegal vector type after legalization");

if (N->getOpcode() == ISD::FP_TO_SINT_SAT ||
N->getOpcode() == ISD::FP_TO_UINT_SAT) {
EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
if (SatVT.getScalarSizeInBits() != IntBits)
return SDValue();
}

SDLoc DL(N);
bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
bool IsSigned = (N->getOpcode() == ISD::FP_TO_SINT ||
N->getOpcode() == ISD::FP_TO_SINT_SAT);
unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfp2fxs
: Intrinsic::aarch64_neon_vcvtfp2fxu;
SDValue FixConv =
Expand Down Expand Up @@ -17241,6 +17250,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performIntToFpCombine(N, DAG, Subtarget);
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT:
return performFpToIntCombine(N, DAG, DCI, Subtarget);
case ISD::FDIV:
return performFDivCombine(N, DAG, DCI, Subtarget);
Expand Down
52 changes: 11 additions & 41 deletions llvm/test/CodeGen/AArch64/fcvt_combine.ll
Expand Up @@ -300,9 +300,7 @@ declare <4 x i24> @llvm.fptoui.sat.v4i24.v4f32(<4 x float>)
define <2 x i32> @test1_sat(<2 x float> %f) {
; CHECK-LABEL: test1_sat:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov v1.2s, #16.00000000
; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
; CHECK-NEXT: fcvtzs v0.2s, v0.2s
; CHECK-NEXT: fcvtzs v0.2s, v0.2s, #4
; CHECK-NEXT: ret
%mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
%vcvt.i = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %mul.i)
Expand All @@ -312,9 +310,7 @@ define <2 x i32> @test1_sat(<2 x float> %f) {
define <4 x i32> @test2_sat(<4 x float> %f) {
; CHECK-LABEL: test2_sat:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v1.4s, #65, lsl #24
; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: fcvtzs v0.4s, v0.4s
; CHECK-NEXT: fcvtzs v0.4s, v0.4s, #3
; CHECK-NEXT: ret
%mul.i = fmul <4 x float> %f, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
%vcvt.i = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %mul.i)
Expand All @@ -324,10 +320,7 @@ define <4 x i32> @test2_sat(<4 x float> %f) {
define <2 x i64> @test3_sat(<2 x double> %d) {
; CHECK-LABEL: test3_sat:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #4629700416936869888
; CHECK-NEXT: dup v1.2d, x8
; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
; CHECK-NEXT: fcvtzs v0.2d, v0.2d, #5
; CHECK-NEXT: ret
%mul.i = fmul <2 x double> %d, <double 32.000000e+00, double 32.000000e+00>
%vcvt.i = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> %mul.i)
Expand All @@ -338,14 +331,8 @@ define <2 x i64> @test3_sat(<2 x double> %d) {
define <2 x i32> @test4_sat(<2 x double> %d) {
; CHECK-LABEL: test4_sat:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov v1.2d, #16.00000000
; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d
; CHECK-NEXT: mov d1, v0.d[1]
; CHECK-NEXT: fcvtzs w8, d0
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: fcvtzs w8, d1
; CHECK-NEXT: mov v0.s[1], w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: fcvtzs v0.2d, v0.2d, #4
; CHECK-NEXT: xtn v0.2s, v0.2d
; CHECK-NEXT: ret
%mul.i = fmul <2 x double> %d, <double 16.000000e+00, double 16.000000e+00>
%vcvt.i = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> %mul.i)
Expand All @@ -356,13 +343,7 @@ define <2 x i32> @test4_sat(<2 x double> %d) {
define <2 x i16> @test5_sat(<2 x float> %f) {
; CHECK-LABEL: test5_sat:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov v1.2s, #16.00000000
; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
; CHECK-NEXT: movi v1.2s, #127, msl #8
; CHECK-NEXT: fcvtzs v0.2s, v0.2s
; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s
; CHECK-NEXT: mvni v1.2s, #127, msl #8
; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s
; CHECK-NEXT: fcvtzs v0.2s, v0.2s, #4
; CHECK-NEXT: ret
%mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
%vcvt.i = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f32(<2 x float> %mul.i)
Expand All @@ -389,9 +370,7 @@ define <2 x i64> @test6_sat(<2 x float> %f) {
define <2 x i32> @test7_sat(<2 x float> %f) {
; CHECK-LABEL: test7_sat:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov v1.2s, #16.00000000
; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
; CHECK-NEXT: fcvtzu v0.2s, v0.2s
; CHECK-NEXT: fcvtzu v0.2s, v0.2s, #4
; CHECK-NEXT: ret
%mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
%vcvt.i = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %mul.i)
Expand Down Expand Up @@ -442,9 +421,7 @@ define <2 x i32> @test10_sat(<2 x float> %f) {
define <2 x i32> @test11_sat(<2 x float> %f) {
; CHECK-LABEL: test11_sat:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v1.2s, #65, lsl #24
; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
; CHECK-NEXT: fcvtzu v0.2s, v0.2s
; CHECK-NEXT: fcvtzu v0.2s, v0.2s, #3
; CHECK-NEXT: ret
%mul.i = fmul <2 x float> %f, <float undef, float 8.000000e+00>
%vcvt.i = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %mul.i)
Expand Down Expand Up @@ -481,10 +458,7 @@ define <2 x i32> @test13_sat(<2 x float> %f) {
define <2 x i32> @test14_sat(<2 x float> %f) {
; CHECK-LABEL: test14_sat:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #1333788672
; CHECK-NEXT: dup v1.2s, w8
; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
; CHECK-NEXT: fcvtzs v0.2s, v0.2s
; CHECK-NEXT: fcvtzs v0.2s, v0.2s, #32
; CHECK-NEXT: ret
%mul.i = fmul <2 x float> %f, <float 0x41F0000000000000, float 0x41F0000000000000>
%vcvt.i = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %mul.i)
Expand All @@ -494,9 +468,7 @@ define <2 x i32> @test14_sat(<2 x float> %f) {
define <3 x i32> @test_illegal_fp_to_int_sat_sat(<3 x float> %in) {
; CHECK-LABEL: test_illegal_fp_to_int_sat_sat:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov v1.4s, #4.00000000
; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: fcvtzs v0.4s, v0.4s
; CHECK-NEXT: fcvtzs v0.4s, v0.4s, #2
; CHECK-NEXT: ret
%mul.i = fmul <3 x float> %in, <float 4.0, float 4.0, float 4.0>
%vcvt.i = call <3 x i32> @llvm.fptosi.sat.v3i32.v3f32(<3 x float> %mul.i)
Expand Down Expand Up @@ -666,10 +638,8 @@ define <4 x i32> @test_v4f16_i32_sat(<4 x half> %in) {
define <4 x i32> @test_extrasat(<4 x float> %f) {
; CHECK-LABEL: test_extrasat:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v1.4s, #65, lsl #24
; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: movi v1.2d, #0xffffff00ffffff
; CHECK-NEXT: fcvtzu v0.4s, v0.4s
; CHECK-NEXT: fcvtzu v0.4s, v0.4s, #3
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT: bic v0.4s, #255, lsl #24
; CHECK-NEXT: ret
Expand Down

0 comments on commit 509b397

Please sign in to comment.