diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index ddb91adf61640..7f14ce8426174 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -16611,27 +16611,30 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) { /// copysign(x, fp_extend(y)) -> copysign(x, y) /// copysign(x, fp_round(y)) -> copysign(x, y) -static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) { - SDValue N1 = N->getOperand(1); - if ((N1.getOpcode() == ISD::FP_EXTEND || - N1.getOpcode() == ISD::FP_ROUND)) { - EVT N1VT = N1->getValueType(0); - EVT N1Op0VT = N1->getOperand(0).getValueType(); +/// Operands to the functions are the type of X and Y respectively. +static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) { + // Always fold no-op FP casts. + if (XTy == YTy) + return true; - // Always fold no-op FP casts. - if (N1VT == N1Op0VT) - return true; + // Do not optimize out type conversion of f128 type yet. + // For some targets like x86_64, configuration is changed to keep one f128 + // value in one SSE register, but instruction selection cannot handle + // FCOPYSIGN on SSE registers yet. + if (YTy == MVT::f128) + return false; - // Do not optimize out type conversion of f128 type yet. - // For some targets like x86_64, configuration is changed to keep one f128 - // value in one SSE register, but instruction selection cannot handle - // FCOPYSIGN on SSE registers yet. - if (N1Op0VT == MVT::f128) - return false; + return !YTy.isVector() || EnableVectorFCopySignExtendRound; +} - return !N1Op0VT.isVector() || EnableVectorFCopySignExtendRound; - } - return false; +static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) { + SDValue N1 = N->getOperand(1); + if (N1.getOpcode() != ISD::FP_EXTEND && + N1.getOpcode() != ISD::FP_ROUND) + return false; + EVT N1VT = N1->getValueType(0); + EVT N1Op0VT = N1->getOperand(0).getValueType(); + return CanCombineFCOPYSIGN_EXTEND_ROUND(N1VT, N1Op0VT); } SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { @@ -16991,7 +16994,13 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { } // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) - if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse()) { + // Note: From a legality perspective, this is a two step transform. First, + // we duplicate the fp_round to the arguments of the copysign, then we + // eliminate the fp_round on Y. The second step requires an additional + // predicate to match the implementation above. + if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() && + CanCombineFCOPYSIGN_EXTEND_ROUND(VT, + N0.getValueType())) { SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT, N0.getOperand(0), N1); AddToWorklist(Tmp.getNode()); diff --git a/llvm/test/CodeGen/AArch64/sve-fcopysign.ll b/llvm/test/CodeGen/AArch64/sve-fcopysign.ll index aefcdbb46680b..65f1055ffafc0 100644 --- a/llvm/test/CodeGen/AArch64/sve-fcopysign.ll +++ b/llvm/test/CodeGen/AArch64/sve-fcopysign.ll @@ -259,30 +259,48 @@ define @test_copysign_v8f16_v8f32( %a, @test_copysign_nxv4f32_nxv4f16( %a, %b) #0 { -; CHECK-LABEL: test_copysign_nxv4f32_nxv4f16: -; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: fcvt z0.h, p0/m, z0.s -; CHECK-NEXT: fcvt z1.h, p0/m, z1.s -; CHECK-NEXT: and z1.h, z1.h, #0x8000 -; CHECK-NEXT: and z0.h, z0.h, #0x7fff -; CHECK-NEXT: orr z0.d, z0.d, z1.d -; CHECK-NEXT: ret +; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_nxv4f32_nxv4f16: +; CHECK-NO-EXTEND-ROUND: // %bb.0: +; CHECK-NO-EXTEND-ROUND-NEXT: and z1.s, z1.s, #0x80000000 +; CHECK-NO-EXTEND-ROUND-NEXT: and z0.s, z0.s, #0x7fffffff +; CHECK-NO-EXTEND-ROUND-NEXT: ptrue p0.s +; CHECK-NO-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z0.h, p0/m, z0.s +; CHECK-NO-EXTEND-ROUND-NEXT: ret +; +; CHECK-EXTEND-ROUND-LABEL: test_copysign_nxv4f32_nxv4f16: +; CHECK-EXTEND-ROUND: // %bb.0: +; CHECK-EXTEND-ROUND-NEXT: ptrue p0.s +; CHECK-EXTEND-ROUND-NEXT: fcvt z0.h, p0/m, z0.s +; CHECK-EXTEND-ROUND-NEXT: fcvt z1.h, p0/m, z1.s +; CHECK-EXTEND-ROUND-NEXT: and z1.h, z1.h, #0x8000 +; CHECK-EXTEND-ROUND-NEXT: and z0.h, z0.h, #0x7fff +; CHECK-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d +; CHECK-EXTEND-ROUND-NEXT: ret %t1 = call @llvm.copysign.v4f32( %a, %b) %t2 = fptrunc %t1 to ret %t2 } define @test_copysign_nxv2f64_nxv2f32( %a, %b) #0 { -; CHECK-LABEL: test_copysign_nxv2f64_nxv2f32: -; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: fcvt z0.s, p0/m, z0.d -; CHECK-NEXT: fcvt z1.s, p0/m, z1.d -; CHECK-NEXT: and z1.s, z1.s, #0x80000000 -; CHECK-NEXT: and z0.s, z0.s, #0x7fffffff -; CHECK-NEXT: orr z0.d, z0.d, z1.d -; CHECK-NEXT: ret +; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_nxv2f64_nxv2f32: +; CHECK-NO-EXTEND-ROUND: // %bb.0: +; CHECK-NO-EXTEND-ROUND-NEXT: and z1.d, z1.d, #0x8000000000000000 +; CHECK-NO-EXTEND-ROUND-NEXT: and z0.d, z0.d, #0x7fffffffffffffff +; CHECK-NO-EXTEND-ROUND-NEXT: ptrue p0.d +; CHECK-NO-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z0.s, p0/m, z0.d +; CHECK-NO-EXTEND-ROUND-NEXT: ret +; +; CHECK-EXTEND-ROUND-LABEL: test_copysign_nxv2f64_nxv2f32: +; CHECK-EXTEND-ROUND: // %bb.0: +; CHECK-EXTEND-ROUND-NEXT: ptrue p0.d +; CHECK-EXTEND-ROUND-NEXT: fcvt z0.s, p0/m, z0.d +; CHECK-EXTEND-ROUND-NEXT: fcvt z1.s, p0/m, z1.d +; CHECK-EXTEND-ROUND-NEXT: and z1.s, z1.s, #0x80000000 +; CHECK-EXTEND-ROUND-NEXT: and z0.s, z0.s, #0x7fffffff +; CHECK-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d +; CHECK-EXTEND-ROUND-NEXT: ret %t1 = call @llvm.copysign.v2f64( %a, %b) %t2 = fptrunc %t1 to ret %t2 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll index eb4e5954515f6..0f4ace490480a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll @@ -1431,3 +1431,16 @@ define @vfcopynsign_exttrunc_vf_nxv8f64_nxv8f32( @llvm.copysign.nxv8f64( %vm, %eneg) ret %r } + +define @fptrunc_of_copysign_nxv2f32_nxv2f64( %X, %Y) { +; CHECK-LABEL: fptrunc_of_copysign_nxv2f32_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vfsgnj.vv v10, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vfncvt.f.f.w v8, v10 +; CHECK-NEXT: ret + %copy = call fast @llvm.copysign.nxv2f64( %X, %Y) + %trunc = fptrunc %copy to + ret %trunc +}