diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 07011ab02070b5..11dd3a6f9e6dc8 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -895,9 +895,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::UINT_TO_FP); - // TODO: Do the same for FP_TO_*INT_SAT. setTargetDAGCombine(ISD::FP_TO_SINT); setTargetDAGCombine(ISD::FP_TO_UINT); + setTargetDAGCombine(ISD::FP_TO_SINT_SAT); + setTargetDAGCombine(ISD::FP_TO_UINT_SAT); setTargetDAGCombine(ISD::FDIV); // Try and combine setcc with csel @@ -13432,8 +13433,16 @@ static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG, assert((ResTy != MVT::v4i64 || DCI.isBeforeLegalizeOps()) && "Illegal vector type after legalization"); + if (N->getOpcode() == ISD::FP_TO_SINT_SAT || + N->getOpcode() == ISD::FP_TO_UINT_SAT) { + EVT SatVT = cast(N->getOperand(1))->getVT(); + if (SatVT.getScalarSizeInBits() != IntBits) + return SDValue(); + } + SDLoc DL(N); - bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT; + bool IsSigned = (N->getOpcode() == ISD::FP_TO_SINT || + N->getOpcode() == ISD::FP_TO_SINT_SAT); unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfp2fxs : Intrinsic::aarch64_neon_vcvtfp2fxu; SDValue FixConv = @@ -17241,6 +17250,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performIntToFpCombine(N, DAG, Subtarget); case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: return performFpToIntCombine(N, DAG, DCI, Subtarget); case ISD::FDIV: return performFDivCombine(N, DAG, DCI, Subtarget); diff --git a/llvm/test/CodeGen/AArch64/fcvt_combine.ll b/llvm/test/CodeGen/AArch64/fcvt_combine.ll index f65ea72cb842a2..16c72465f3e2d4 100644 --- a/llvm/test/CodeGen/AArch64/fcvt_combine.ll +++ b/llvm/test/CodeGen/AArch64/fcvt_combine.ll @@ -300,9 +300,7 @@ declare <4 x i24> @llvm.fptoui.sat.v4i24.v4f32(<4 x float>) define <2 x i32> @test1_sat(<2 x float> %f) { ; CHECK-LABEL: test1_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov v1.2s, #16.00000000 -; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s -; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: fcvtzs v0.2s, v0.2s, #4 ; CHECK-NEXT: ret %mul.i = fmul <2 x float> %f, %vcvt.i = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %mul.i) @@ -312,9 +310,7 @@ define <2 x i32> @test1_sat(<2 x float> %f) { define <4 x i32> @test2_sat(<4 x float> %f) { ; CHECK-LABEL: test2_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #65, lsl #24 -; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s -; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: fcvtzs v0.4s, v0.4s, #3 ; CHECK-NEXT: ret %mul.i = fmul <4 x float> %f, %vcvt.i = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %mul.i) @@ -324,10 +320,7 @@ define <4 x i32> @test2_sat(<4 x float> %f) { define <2 x i64> @test3_sat(<2 x double> %d) { ; CHECK-LABEL: test3_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #4629700416936869888 -; CHECK-NEXT: dup v1.2d, x8 -; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d -; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d, #5 ; CHECK-NEXT: ret %mul.i = fmul <2 x double> %d, %vcvt.i = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> %mul.i) @@ -338,14 +331,8 @@ define <2 x i64> @test3_sat(<2 x double> %d) { define <2 x i32> @test4_sat(<2 x double> %d) { ; CHECK-LABEL: test4_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov v1.2d, #16.00000000 -; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d -; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: fcvtzs w8, d0 -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzs w8, d1 -; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: fcvtzs v0.2d, v0.2d, #4 +; CHECK-NEXT: xtn v0.2s, v0.2d ; CHECK-NEXT: ret %mul.i = fmul <2 x double> %d, %vcvt.i = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> %mul.i) @@ -356,13 +343,7 @@ define <2 x i32> @test4_sat(<2 x double> %d) { define <2 x i16> @test5_sat(<2 x float> %f) { ; CHECK-LABEL: test5_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov v1.2s, #16.00000000 -; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s -; CHECK-NEXT: movi v1.2s, #127, msl #8 -; CHECK-NEXT: fcvtzs v0.2s, v0.2s -; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s -; CHECK-NEXT: mvni v1.2s, #127, msl #8 -; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s +; CHECK-NEXT: fcvtzs v0.2s, v0.2s, #4 ; CHECK-NEXT: ret %mul.i = fmul <2 x float> %f, %vcvt.i = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f32(<2 x float> %mul.i) @@ -389,9 +370,7 @@ define <2 x i64> @test6_sat(<2 x float> %f) { define <2 x i32> @test7_sat(<2 x float> %f) { ; CHECK-LABEL: test7_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov v1.2s, #16.00000000 -; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s -; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: fcvtzu v0.2s, v0.2s, #4 ; CHECK-NEXT: ret %mul.i = fmul <2 x float> %f, %vcvt.i = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %mul.i) @@ -442,9 +421,7 @@ define <2 x i32> @test10_sat(<2 x float> %f) { define <2 x i32> @test11_sat(<2 x float> %f) { ; CHECK-LABEL: test11_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2s, #65, lsl #24 -; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s -; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: fcvtzu v0.2s, v0.2s, #3 ; CHECK-NEXT: ret %mul.i = fmul <2 x float> %f, %vcvt.i = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %mul.i) @@ -481,10 +458,7 @@ define <2 x i32> @test13_sat(<2 x float> %f) { define <2 x i32> @test14_sat(<2 x float> %f) { ; CHECK-LABEL: test14_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1333788672 -; CHECK-NEXT: dup v1.2s, w8 -; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s -; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: fcvtzs v0.2s, v0.2s, #32 ; CHECK-NEXT: ret %mul.i = fmul <2 x float> %f, %vcvt.i = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %mul.i) @@ -494,9 +468,7 @@ define <2 x i32> @test14_sat(<2 x float> %f) { define <3 x i32> @test_illegal_fp_to_int_sat_sat(<3 x float> %in) { ; CHECK-LABEL: test_illegal_fp_to_int_sat_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov v1.4s, #4.00000000 -; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s -; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: fcvtzs v0.4s, v0.4s, #2 ; CHECK-NEXT: ret %mul.i = fmul <3 x float> %in, %vcvt.i = call <3 x i32> @llvm.fptosi.sat.v3i32.v3f32(<3 x float> %mul.i) @@ -666,10 +638,8 @@ define <4 x i32> @test_v4f16_i32_sat(<4 x half> %in) { define <4 x i32> @test_extrasat(<4 x float> %f) { ; CHECK-LABEL: test_extrasat: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #65, lsl #24 -; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: movi v1.2d, #0xffffff00ffffff -; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: fcvtzu v0.4s, v0.4s, #3 ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: bic v0.4s, #255, lsl #24 ; CHECK-NEXT: ret