Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 3 additions & 6 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17983,8 +17983,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {

// (fsub A, 0) -> A
if (N1CFP && N1CFP->isZero()) {
if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
Flags.hasNoSignedZeros()) {
if (!N1CFP->isNegative() || Flags.hasNoSignedZeros()) {
return N0;
}
}
Expand All @@ -17997,8 +17996,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {

// (fsub -0.0, N1) -> -N1
if (N0CFP && N0CFP->isZero()) {
if (N0CFP->isNegative() ||
(Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
if (N0CFP->isNegative() || Flags.hasNoSignedZeros()) {
// We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
// flushed to zero, unless all users treat denorms as zero (DAZ).
// FIXME: This transform will change the sign of a NaN and the behavior
Expand All @@ -18014,8 +18012,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
}
}

if ((Options.NoSignedZerosFPMath ||
(Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
if (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros() &&
N1.getOpcode() == ISD::FADD) {
// X - (X + Y) -> -Y
if (N0 == N1->getOperand(0))
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/fneg-combines.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5015,7 +5015,7 @@ define amdgpu_kernel void @v_fneg_fp_round_fneg_f64_to_f32(ptr addrspace(1) %out
%a.gep = getelementptr inbounds double, ptr addrspace(1) %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
%a = load volatile double, ptr addrspace(1) %a.gep
%fneg.a = fsub double -0.000000e+00, %a
%fneg.a = fsub nsz double -0.000000e+00, %a
%fpround = fptrunc double %fneg.a to float
%fneg = fneg float %fpround
store float %fneg, ptr addrspace(1) %out.gep
Expand Down
29 changes: 22 additions & 7 deletions llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4441,25 +4441,40 @@ define float @v_fneg_fabs_select_infloop_regression(float %arg, i1 %arg1) {
ret float %i3
}

define float @v_fmul_0_fsub_0_infloop_regression(float %arg) {
; GCN-SAFE-LABEL: v_fmul_0_fsub_0_infloop_regression:
define float @v_fmul_0_fsub_0_safe_infloop_regression(float %arg) {
; GCN-SAFE-LABEL: v_fmul_0_fsub_0_safe_infloop_regression:
; GCN-SAFE: ; %bb.0: ; %bb
; GCN-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-SAFE-NEXT: v_mul_f32_e32 v0, 0, v0
; GCN-SAFE-NEXT: v_sub_f32_e32 v0, 0, v0
; GCN-SAFE-NEXT: s_setpc_b64 s[30:31]
;
; GCN-NSZ-LABEL: v_fmul_0_fsub_0_infloop_regression:
; GCN-NSZ: ; %bb.0: ; %bb
; GCN-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NSZ-NEXT: v_mul_f32_e32 v0, 0x80000000, v0
; GCN-NSZ-NEXT: s_setpc_b64 s[30:31]
; SI-NSZ-LABEL: v_fmul_0_fsub_0_safe_infloop_regression:
; SI-NSZ: ; %bb.0: ; %bb
; SI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NSZ-NEXT: s_brev_b32 s4, 1
; SI-NSZ-NEXT: v_fma_f32 v0, v0, s4, 0
; SI-NSZ-NEXT: s_setpc_b64 s[30:31]
; FIXME: utils/update_llc_test_checks.py will generate redundant VI
; labels, remove them, they will cause test failure.
bb:
%i = fmul float %arg, 0.0
%i1 = fsub float 0.0, %i
ret float %i1
}

define float @v_fmul_0_fsub_0_nsz_infloop_regression(float %arg) {
; GCN-LABEL: v_fmul_0_fsub_0_nsz_infloop_regression:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mul_f32_e32 v0, 0x80000000, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
bb:
%i = fmul float %arg, 0.0
%i1 = fsub nsz float 0.0, %i
ret float %i1
}

declare i32 @llvm.amdgcn.workitem.id.x() #1
declare float @llvm.fma.f32(float, float, float) #1
declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/vec_unsafe-fp-math.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -enable-unsafe-fp-math -enable-no-signed-zeros-fp-math -mtriple=x86_64-unknown-unknown | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s

; Make sure that vectors get the same benefits as scalars when using unsafe-fp-math.

Expand All @@ -18,7 +18,7 @@ define <4 x float> @vec_fneg(<4 x float> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: retq
%sub = fsub <4 x float> zeroinitializer, %x
%sub = fsub nsz <4 x float> zeroinitializer, %x
ret <4 x float> %sub
}