diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c81568672de3c..77df4b4598c48 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -17983,8 +17983,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // (fsub A, 0) -> A if (N1CFP && N1CFP->isZero()) { - if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath || - Flags.hasNoSignedZeros()) { + if (!N1CFP->isNegative() || Flags.hasNoSignedZeros()) { return N0; } } @@ -17997,8 +17996,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // (fsub -0.0, N1) -> -N1 if (N0CFP && N0CFP->isZero()) { - if (N0CFP->isNegative() || - (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) { + if (N0CFP->isNegative() || Flags.hasNoSignedZeros()) { // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are // flushed to zero, unless all users treat denorms as zero (DAZ). // FIXME: This transform will change the sign of a NaN and the behavior @@ -18014,8 +18012,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } } - if ((Options.NoSignedZerosFPMath || - (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) && + if (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros() && N1.getOpcode() == ISD::FADD) { // X - (X + Y) -> -Y if (N0 == N1->getOperand(0)) diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll index 12e9888314fc1..aaea4f76ea49b 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll @@ -5015,7 +5015,7 @@ define amdgpu_kernel void @v_fneg_fp_round_fneg_f64_to_f32(ptr addrspace(1) %out %a.gep = getelementptr inbounds double, ptr addrspace(1) %a.ptr, i64 %tid.ext %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext %a = load volatile double, ptr addrspace(1) %a.gep - %fneg.a = fsub double -0.000000e+00, %a + %fneg.a = fsub nsz double -0.000000e+00, %a %fpround = fptrunc double %fneg.a to float %fneg = fneg float %fpround store float %fneg, ptr addrspace(1) %out.gep diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll index c4ca79dc85312..3de6df211ac7c 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll @@ -4441,25 +4441,40 @@ define float @v_fneg_fabs_select_infloop_regression(float %arg, i1 %arg1) { ret float %i3 } -define float @v_fmul_0_fsub_0_infloop_regression(float %arg) { -; GCN-SAFE-LABEL: v_fmul_0_fsub_0_infloop_regression: +define float @v_fmul_0_fsub_0_safe_infloop_regression(float %arg) { +; GCN-SAFE-LABEL: v_fmul_0_fsub_0_safe_infloop_regression: ; GCN-SAFE: ; %bb.0: ; %bb ; GCN-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-SAFE-NEXT: v_mul_f32_e32 v0, 0, v0 ; GCN-SAFE-NEXT: v_sub_f32_e32 v0, 0, v0 ; GCN-SAFE-NEXT: s_setpc_b64 s[30:31] ; -; GCN-NSZ-LABEL: v_fmul_0_fsub_0_infloop_regression: -; GCN-NSZ: ; %bb.0: ; %bb -; GCN-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NSZ-NEXT: v_mul_f32_e32 v0, 0x80000000, v0 -; GCN-NSZ-NEXT: s_setpc_b64 s[30:31] +; SI-NSZ-LABEL: v_fmul_0_fsub_0_safe_infloop_regression: +; SI-NSZ: ; %bb.0: ; %bb +; SI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NSZ-NEXT: s_brev_b32 s4, 1 +; SI-NSZ-NEXT: v_fma_f32 v0, v0, s4, 0 +; SI-NSZ-NEXT: s_setpc_b64 s[30:31] +; FIXME: utils/update_llc_test_checks.py will generate redundant VI +; labels, remove them, they will cause test failure. bb: %i = fmul float %arg, 0.0 %i1 = fsub float 0.0, %i ret float %i1 } +define float @v_fmul_0_fsub_0_nsz_infloop_regression(float %arg) { +; GCN-LABEL: v_fmul_0_fsub_0_nsz_infloop_regression: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mul_f32_e32 v0, 0x80000000, v0 +; GCN-NEXT: s_setpc_b64 s[30:31] +bb: + %i = fmul float %arg, 0.0 + %i1 = fsub nsz float 0.0, %i + ret float %i1 +} + declare i32 @llvm.amdgcn.workitem.id.x() #1 declare float @llvm.fma.f32(float, float, float) #1 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) diff --git a/llvm/test/CodeGen/X86/vec_unsafe-fp-math.ll b/llvm/test/CodeGen/X86/vec_unsafe-fp-math.ll index 23d22e75d1e9d..3f92d2b79c85d 100644 --- a/llvm/test/CodeGen/X86/vec_unsafe-fp-math.ll +++ b/llvm/test/CodeGen/X86/vec_unsafe-fp-math.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -enable-unsafe-fp-math -enable-no-signed-zeros-fp-math -mtriple=x86_64-unknown-unknown | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s ; Make sure that vectors get the same benefits as scalars when using unsafe-fp-math. @@ -18,7 +18,7 @@ define <4 x float> @vec_fneg(<4 x float> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-NEXT: retq - %sub = fsub <4 x float> zeroinitializer, %x + %sub = fsub nsz <4 x float> zeroinitializer, %x ret <4 x float> %sub }