diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 93df88b3f6d7f..0b1c25764549d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -6822,6 +6822,14 @@ bool TargetLowering::expandABS(SDNode *N, SDValue &Result, EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); SDValue Op = N->getOperand(0); + // abs(x) -> smax(x,sub(0,x)) + if (isOperationLegal(ISD::SUB, VT) && isOperationLegal(ISD::SMAX, VT)) { + SDValue Zero = DAG.getConstant(0, dl, VT); + Result = DAG.getNode(ISD::SMAX, dl, VT, Op, + DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); + return true; + } + // Only expand vector types if we have the appropriate vector operations. if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SRA, VT) || !isOperationLegalOrCustom(ISD::ADD, VT) || diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index d19fbd477d77e..1f3ef3efa2c27 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -806,9 +806,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::UMIN, MVT::v2i64, Expand); } - for (auto VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8}) - setOperationAction(ISD::ABS, VT, Custom); - // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle // with merges, splats, etc. setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); @@ -841,11 +838,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); - // Without hasP8Altivec set, v2i64 SMAX isn't available. - // But ABS custom lowering requires SMAX support. - if (!Subtarget.hasP8Altivec()) - setOperationAction(ISD::ABS, MVT::v2i64, Expand); - // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8. setOperationAction(ISD::ROTL, MVT::v1i128, Custom); // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w). @@ -10886,44 +10878,6 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { } } -SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const { - - assert(Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS"); - - EVT VT = Op.getValueType(); - assert(VT.isVector() && - "Only set vector abs as custom, scalar abs shouldn't reach here!"); - assert((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || - VT == MVT::v16i8) && - "Unexpected vector element type!"); - assert((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) && - "Current subtarget doesn't support smax v2i64!"); - - // For vector abs, it can be lowered to: - // abs x - // ==> - // y = -x - // smax(x, y) - - SDLoc dl(Op); - SDValue X = Op.getOperand(0); - SDValue Zero = DAG.getConstant(0, dl, VT); - SDValue Y = DAG.getNode(ISD::SUB, dl, VT, Zero, X); - - // SMAX patch https://reviews.llvm.org/D47332 - // hasn't landed yet, so use intrinsic first here. - // TODO: Should use SMAX directly once SMAX patch landed - Intrinsic::ID BifID = Intrinsic::ppc_altivec_vmaxsw; - if (VT == MVT::v2i64) - BifID = Intrinsic::ppc_altivec_vmaxsd; - else if (VT == MVT::v8i16) - BifID = Intrinsic::ppc_altivec_vmaxsh; - else if (VT == MVT::v16i8) - BifID = Intrinsic::ppc_altivec_vmaxsb; - - return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT); -} - // Custom lowering for fpext vf32 to v2f64 SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { @@ -11059,7 +11013,6 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); - case ISD::ABS: return LowerABS(Op, DAG); case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); case ISD::ROTL: return LowerROTL(Op, DAG); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 6c4899fae22cb..8fad97c618b7a 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1157,7 +1157,6 @@ namespace llvm { SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/X86/abs.ll b/llvm/test/CodeGen/X86/abs.ll index 8e20b001cc3e8..942f09f9341b8 100644 --- a/llvm/test/CodeGen/X86/abs.ll +++ b/llvm/test/CodeGen/X86/abs.ll @@ -397,10 +397,9 @@ define <8 x i32> @test_v8i32(<8 x i32> %a) nounwind { define <8 x i16> @test_v8i16(<8 x i16> %a) nounwind { ; SSE-LABEL: test_v8i16: ; SSE: # %bb.0: -; SSE-NEXT: movdqa %xmm0, %xmm1 -; SSE-NEXT: psraw $15, %xmm1 -; SSE-NEXT: paddw %xmm1, %xmm0 -; SSE-NEXT: pxor %xmm1, %xmm0 +; SSE-NEXT: pxor %xmm1, %xmm1 +; SSE-NEXT: psubw %xmm0, %xmm1 +; SSE-NEXT: pmaxsw %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test_v8i16: diff --git a/llvm/test/CodeGen/X86/combine-abs.ll b/llvm/test/CodeGen/X86/combine-abs.ll index af399e3037049..da59da48a919b 100644 --- a/llvm/test/CodeGen/X86/combine-abs.ll +++ b/llvm/test/CodeGen/X86/combine-abs.ll @@ -55,10 +55,9 @@ define i32 @combine_i32_abs_abs(i32 %a) { define <8 x i16> @combine_v8i16_abs_abs(<8 x i16> %a) { ; SSE2-LABEL: combine_v8i16_abs_abs: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psraw $15, %xmm1 -; SSE2-NEXT: paddw %xmm1, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: psubw %xmm0, %xmm1 +; SSE2-NEXT: pmaxsw %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE42-LABEL: combine_v8i16_abs_abs: diff --git a/llvm/test/CodeGen/X86/viabs.ll b/llvm/test/CodeGen/X86/viabs.ll index 588246d7ded61..873dea24ecdd1 100644 --- a/llvm/test/CodeGen/X86/viabs.ll +++ b/llvm/test/CodeGen/X86/viabs.ll @@ -88,10 +88,9 @@ define <4 x i32> @test_abs_ge_v4i32(<4 x i32> %a) nounwind { define <8 x i16> @test_abs_gt_v8i16(<8 x i16> %a) nounwind { ; SSE2-LABEL: test_abs_gt_v8i16: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psraw $15, %xmm1 -; SSE2-NEXT: paddw %xmm1, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: psubw %xmm0, %xmm1 +; SSE2-NEXT: pmaxsw %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: test_abs_gt_v8i16: @@ -311,14 +310,12 @@ define <8 x i32> @test_abs_ge_v8i32(<8 x i32> %a) nounwind { define <16 x i16> @test_abs_gt_v16i16(<16 x i16> %a) nounwind { ; SSE2-LABEL: test_abs_gt_v16i16: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: psraw $15, %xmm2 -; SSE2-NEXT: paddw %xmm2, %xmm0 -; SSE2-NEXT: pxor %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: psraw $15, %xmm2 -; SSE2-NEXT: paddw %xmm2, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm2 +; SSE2-NEXT: pxor %xmm3, %xmm3 +; SSE2-NEXT: psubw %xmm0, %xmm3 +; SSE2-NEXT: pmaxsw %xmm3, %xmm0 +; SSE2-NEXT: psubw %xmm1, %xmm2 +; SSE2-NEXT: pmaxsw %xmm2, %xmm1 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: test_abs_gt_v16i16: @@ -948,22 +945,18 @@ define <64 x i8> @test_abs_lt_v64i8(<64 x i8> %a) nounwind { define <32 x i16> @test_abs_gt_v32i16(<32 x i16> %a) nounwind { ; SSE2-LABEL: test_abs_gt_v32i16: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm4 -; SSE2-NEXT: psraw $15, %xmm4 -; SSE2-NEXT: paddw %xmm4, %xmm0 -; SSE2-NEXT: pxor %xmm4, %xmm0 -; SSE2-NEXT: movdqa %xmm1, %xmm4 -; SSE2-NEXT: psraw $15, %xmm4 -; SSE2-NEXT: paddw %xmm4, %xmm1 -; SSE2-NEXT: pxor %xmm4, %xmm1 -; SSE2-NEXT: movdqa %xmm2, %xmm4 -; SSE2-NEXT: psraw $15, %xmm4 -; SSE2-NEXT: paddw %xmm4, %xmm2 -; SSE2-NEXT: pxor %xmm4, %xmm2 -; SSE2-NEXT: movdqa %xmm3, %xmm4 -; SSE2-NEXT: psraw $15, %xmm4 -; SSE2-NEXT: paddw %xmm4, %xmm3 -; SSE2-NEXT: pxor %xmm4, %xmm3 +; SSE2-NEXT: pxor %xmm4, %xmm4 +; SSE2-NEXT: pxor %xmm5, %xmm5 +; SSE2-NEXT: psubw %xmm0, %xmm5 +; SSE2-NEXT: pmaxsw %xmm5, %xmm0 +; SSE2-NEXT: pxor %xmm5, %xmm5 +; SSE2-NEXT: psubw %xmm1, %xmm5 +; SSE2-NEXT: pmaxsw %xmm5, %xmm1 +; SSE2-NEXT: pxor %xmm5, %xmm5 +; SSE2-NEXT: psubw %xmm2, %xmm5 +; SSE2-NEXT: pmaxsw %xmm5, %xmm2 +; SSE2-NEXT: psubw %xmm3, %xmm4 +; SSE2-NEXT: pmaxsw %xmm4, %xmm3 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: test_abs_gt_v32i16: