Skip to content

Commit

Permalink
DAG: Fix incorrect folding of fmul -1 to fneg
Browse files Browse the repository at this point in the history
The fmul is a canonicalizing operation, and fneg is not so this would
break denormals that need flushing and also would not quiet signaling
nans. Fold to fsub instead, which is also canonicalizing.
  • Loading branch information
arsenm committed Sep 15, 2021
1 parent 299b5d4 commit 54d755a
Show file tree
Hide file tree
Showing 7 changed files with 77 additions and 17 deletions.
11 changes: 7 additions & 4 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Expand Up @@ -14004,10 +14004,13 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (N1CFP && N1CFP->isExactlyValue(+2.0))
return DAG.getNode(ISD::FADD, DL, VT, N0, N0);

// fold (fmul X, -1.0) -> (fneg X)
if (N1CFP && N1CFP->isExactlyValue(-1.0))
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
return DAG.getNode(ISD::FNEG, DL, VT, N0);
// fold (fmul X, -1.0) -> (fsub -0.0, X)
if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
return DAG.getNode(ISD::FSUB, DL, VT,
DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
}
}

// -N0 * -N1 --> N0 * N1
TargetLowering::NegatibleCost CostN0 =
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AArch64/arm64-fmadd.ll
Expand Up @@ -82,7 +82,7 @@ define double @fms64(double %a, double %b, double %c) nounwind readnone ssp {
; CHECK-NEXT: fmsub d0, d0, d1, d2
; CHECK-NEXT: ret
entry:
%mul = fmul double %b, -1.000000e+00
%mul = fneg double %b
%0 = tail call double @llvm.fma.f64(double %a, double %mul, double %c)
ret double %0
}
Expand All @@ -93,7 +93,7 @@ define double @fms64_com(double %a, double %b, double %c) nounwind readnone ssp
; CHECK-NEXT: fmsub d0, d1, d0, d2
; CHECK-NEXT: ret
entry:
%mul = fmul double %b, -1.000000e+00
%mul = fneg double %b
%0 = tail call double @llvm.fma.f64(double %mul, double %a, double %c)
ret double %0
}
Expand All @@ -104,7 +104,7 @@ define double @fnms64(double %a, double %b, double %c) nounwind readnone ssp {
; CHECK-NEXT: fnmsub d0, d0, d1, d2
; CHECK-NEXT: ret
entry:
%mul = fmul double %c, -1.000000e+00
%mul = fneg double %c
%0 = tail call double @llvm.fma.f64(double %a, double %b, double %mul)
ret double %0
}
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_3op.ll
Expand Up @@ -22,7 +22,7 @@ define half @fms16(half %a, half %b, half %c) nounwind readnone ssp {
; CHECK-LABEL: fms16:
; CHECK: fmsub h0, h0, h1, h2
entry:
%mul = fmul half %b, -1.000000e+00
%mul = fneg half %b
%0 = tail call half @llvm.fma.f16(half %a, half %mul, half %c)
ret half %0
}
Expand All @@ -32,7 +32,7 @@ define half @fms16_com(half %a, half %b, half %c) nounwind readnone ssp {
; CHECK: fmsub h0, h1, h0, h2
; CHECK-NEXT: ret
entry:
%mul = fmul half %b, -1.000000e+00
%mul = fneg half %b
%0 = tail call half @llvm.fma.f16(half %mul, half %a, half %c)
ret half %0
}
Expand All @@ -42,7 +42,7 @@ define half @fnms16(half %a, half %b, half %c) nounwind readnone ssp {
; CHECK: fnmsub h0, h0, h1, h2
; CHECK-NEXT: ret
entry:
%mul = fmul half %c, -1.000000e+00
%mul = fneg half %c
%0 = tail call half @llvm.fma.f16(half %a, half %b, half %mul)
ret half %0
}
Expand Down
45 changes: 45 additions & 0 deletions llvm/test/CodeGen/AMDGPU/fneg-combines.ll
Expand Up @@ -2597,6 +2597,51 @@ bb:
ret <2 x float> %i6
}

; This expects denormal flushing, so can't turn this fmul into fneg
; TODO: Keeping this as fmul saves encoding size
; GCN-LABEL: {{^}}nnan_fmul_neg1_to_fneg:
; GCN: v_sub_f32_e32 [[TMP:v[0-9]+]], 0x80000000, v0
; GCN-NEXT: v_mul_f32_e32 v0, [[TMP]], v1
define float @nnan_fmul_neg1_to_fneg(float %x, float %y) #0 {
%mul = fmul float %x, -1.0
%add = fmul nnan float %mul, %y
ret float %add
}

; It's legal to turn this fmul into an fneg since denormals are
; preserved and we know an snan can't happen from the flag.
; GCN-LABEL: {{^}}denormal_fmul_neg1_to_fneg:
; GCN: v_mul_f32_e64 v0, -v0, v1
; GCN-NEXT: s_setpc_b64
define float @denormal_fmul_neg1_to_fneg(float %x, float %y) {
%mul = fmul nnan float %x, -1.0
%add = fmul float %mul, %y
ret float %add
}

; know the source can't be an snan
; GCN-LABEL: {{^}}denorm_snan_fmul_neg1_to_fneg:
; GCN: v_mul_f32_e64 [[TMP:v[0-9]+]], v0, -v0
; GCN: v_mul_f32_e32 v0, [[TMP]], v1
; GCN-NEXT: s_setpc_b64
define float @denorm_snan_fmul_neg1_to_fneg(float %x, float %y) {
%canonical = fmul float %x, %x
%mul = fmul float %canonical, -1.0
%add = fmul float %mul, %y
ret float %add
}

; GCN-LABEL: {{^}}flush_snan_fmul_neg1_to_fneg:
; GCN: v_mul_f32_e32 [[TMP0:v[0-9]+]], 1.0, v0
; GCN: v_sub_f32_e32 [[TMP1:v[0-9]+]], 0x80000000, [[TMP0]]
; GCN-NEXT: v_mul_f32_e32 v0, [[TMP1]], v1
define float @flush_snan_fmul_neg1_to_fneg(float %x, float %y) #0 {
%quiet = call float @llvm.canonicalize.f32(float %x)
%mul = fmul float %quiet, -1.0
%add = fmul float %mul, %y
ret float %add
}

declare i32 @llvm.amdgcn.workitem.id.x() #1
declare float @llvm.fma.f32(float, float, float) #1
declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/ARM/fnegs.ll
Expand Up @@ -49,7 +49,7 @@ entry:
define float @test2(float* %a) {
entry:
%0 = load float, float* %a, align 4 ; <float> [#uses=2]
%1 = fmul float -1.000000e+00, %0 ; <float> [#uses=2]
%1 = fneg float %0 ; <float> [#uses=2]
%2 = fpext float %1 to double ; <double> [#uses=1]
%3 = fcmp olt double %2, 1.234000e+00 ; <i1> [#uses=1]
%retval = select i1 %3, float %1, float %0 ; <float> [#uses=1]
Expand Down
16 changes: 14 additions & 2 deletions llvm/test/CodeGen/Hexagon/opt-fneg.ll
Expand Up @@ -3,6 +3,7 @@

define float @foo(float %x) nounwind {
entry:
; CHECK-LABEL: foo:
; CHECK: r{{[0-9]+}} = togglebit(r{{[0-9]+}},#31)
%x.addr = alloca float, align 4
store float %x, float* %x.addr, align 4
Expand All @@ -13,14 +14,25 @@ entry:

define float @bar(float %x) nounwind {
entry:
; CHECK-LABEL: bar:
; CHECK: r{{[0-9]+}} = togglebit(r{{[0-9]+}},#31)
%sub = fsub float -0.000000e+00, %x
ret float %sub
}

define float @baz(float %x) nounwind {
define float @baz0(float %x) nounwind {
entry:
; CHECK-LABEL: baz0:
; CHECK: r{{[0-9]+}} = togglebit(r{{[0-9]+}},#31)
%conv1 = fmul float %x, -1.000000e+00
%conv1 = fmul nnan float %x, -1.000000e+00
ret float %conv1
}

define float @baz1(float %x) nounwind {
entry:
%not.nan = fadd nnan float %x, %x
; CHECK-LABEL: baz1:
; CHECK: r{{[0-9]+}} = togglebit(r{{[0-9]+}},#31)
%conv1 = fmul float %not.nan, -1.000000e+00
ret float %conv1
}
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/PowerPC/combine-fneg.ll
Expand Up @@ -13,10 +13,10 @@ define <4 x double> @fneg_fdiv_splat(double %a0, <4 x double> %a1) {
; CHECK-NEXT: xvredp 2, 0
; CHECK-NEXT: xxswapd 1, 1
; CHECK-NEXT: xxlor 3, 1, 1
; CHECK-NEXT: xvnmsubadp 3, 0, 2
; CHECK-NEXT: xvmaddadp 2, 2, 3
; CHECK-NEXT: xvnmsubadp 1, 0, 2
; CHECK-NEXT: xvnmaddadp 2, 2, 1
; CHECK-NEXT: xvmaddadp 3, 0, 2
; CHECK-NEXT: xvnmsubadp 2, 2, 3
; CHECK-NEXT: xvmaddadp 1, 0, 2
; CHECK-NEXT: xvmsubadp 2, 2, 1
; CHECK-NEXT: xvmuldp 34, 34, 2
; CHECK-NEXT: xvmuldp 35, 35, 2
; CHECK-NEXT: blr
Expand Down

0 comments on commit 54d755a

Please sign in to comment.