Skip to content

Commit

Permalink
[FPEnv] Don't transform FSUB(-0,X)->FNEG(X) in SelectionDAGBuilder.
Browse files Browse the repository at this point in the history
This patch stops unconditionally transforming FSUB(-0,X) into an FNEG(X) while building the DAG. There is also one small change to handle the new FSUB(-0,X) similarly to FNEG(X) in the AMDGPU backend.

Differential Revision: https://reviews.llvm.org/D84056
  • Loading branch information
Cameron McInally committed Aug 3, 2020
1 parent 10851f9 commit 31c7a2f
Show file tree
Hide file tree
Showing 12 changed files with 155 additions and 162 deletions.
14 changes: 0 additions & 14 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3005,20 +3005,6 @@ void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
}

void SelectionDAGBuilder::visitFSub(const User &I) {
// -0.0 - X --> fneg
Type *Ty = I.getType();
if (isa<Constant>(I.getOperand(0)) &&
I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) {
SDValue Op2 = getValue(I.getOperand(1));
setValue(&I, DAG.getNode(ISD::FNEG, getCurSDLoc(),
Op2.getValueType(), Op2));
return;
}

visitBinary(I, ISD::FSUB);
}

void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) {
SDNodeFlags Flags;

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -692,7 +692,7 @@ class SelectionDAGBuilder {
void visitAdd(const User &I) { visitBinary(I, ISD::ADD); }
void visitFAdd(const User &I) { visitBinary(I, ISD::FADD); }
void visitSub(const User &I) { visitBinary(I, ISD::SUB); }
void visitFSub(const User &I);
void visitFSub(const User &I) { visitBinary(I, ISD::FSUB); }
void visitMul(const User &I) { visitBinary(I, ISD::MUL); }
void visitFMul(const User &I) { visitBinary(I, ISD::FMUL); }
void visitURem(const User &I) { visitBinary(I, ISD::UREM); }
Expand Down
11 changes: 9 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3795,8 +3795,15 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
SDValue Res = DAG.getNode(AMDGPUISD::FMED3, SL, VT, Ops, N0->getFlags());
if (Res.getOpcode() != AMDGPUISD::FMED3)
return SDValue(); // Op got folded away.
if (!N0.hasOneUse())
DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));

if (!N0.hasOneUse()) {
SDValue Neg = DAG.getNode(ISD::FNEG, SL, VT, Res);
DAG.ReplaceAllUsesWith(N0, Neg);

for (SDNode *U : Neg->uses())
DCI.AddToWorklist(U);
}

return Res;
}
case ISD::FP_EXTEND:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ define amdgpu_kernel void @test_no_fold_canonicalize_fneg_value_f32(float addrsp
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
%load = load float, float addrspace(1)* %gep, align 4
%v = fsub float -0.0, %load
%v = fneg float %load
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
store float %canonicalized, float addrspace(1)* %gep, align 4
ret void
Expand All @@ -327,7 +327,7 @@ define amdgpu_kernel void @test_fold_canonicalize_fneg_value_f32(float addrspace
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
%load = load float, float addrspace(1)* %gep, align 4
%v0 = fadd float %load, 0.0
%v = fsub float -0.0, %v0
%v = fneg float %v0
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
store float %canonicalized, float addrspace(1)* %gep, align 4
ret void
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ define amdgpu_kernel void @v_test_canonicalize_fabs_var_f16(half addrspace(1)* %
define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f16(half addrspace(1)* %out) #1 {
%val = load half, half addrspace(1)* %out
%val.fabs = call half @llvm.fabs.f16(half %val)
%val.fabs.fneg = fsub half -0.0, %val.fabs
%val.fabs.fneg = fneg half %val.fabs
%canonicalized = call half @llvm.canonicalize.f16(half %val.fabs.fneg)
store half %canonicalized, half addrspace(1)* %out
ret void
Expand All @@ -91,7 +91,7 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f16(half addrspace(
; CI: v_mul_f32_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}}
define amdgpu_kernel void @v_test_canonicalize_fneg_var_f16(half addrspace(1)* %out) #1 {
%val = load half, half addrspace(1)* %out
%val.fneg = fsub half -0.0, %val
%val.fneg = fneg half %val
%canonicalized = call half @llvm.canonicalize.f16(half %val.fneg)
store half %canonicalized, half addrspace(1)* %out
ret void
Expand All @@ -103,7 +103,7 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_var_f16(half addrspace(1)* %
; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_var_f16(half addrspace(1)* %out) #2 {
%val = load half, half addrspace(1)* %out
%val.fneg = fsub half -0.0, %val
%val.fneg = fneg half %val
%canonicalized = call half @llvm.canonicalize.f16(half %val.fneg)
store half %canonicalized, half addrspace(1)* %out
ret void
Expand All @@ -120,7 +120,7 @@ define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_var_f16(half ad
define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_fabs_var_f16(half addrspace(1)* %out) #2 {
%val = load half, half addrspace(1)* %out
%val.fabs = call half @llvm.fabs.f16(half %val)
%val.fabs.fneg = fsub half -0.0, %val.fabs
%val.fabs.fneg = fneg half %val.fabs
%canonicalized = call half @llvm.canonicalize.f16(half %val.fabs.fneg)
store half %canonicalized, half addrspace(1)* %out
ret void
Expand Down Expand Up @@ -323,7 +323,7 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_v2f16(<2 x half> ad
%gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
%val = load <2 x half>, <2 x half> addrspace(1)* %gep
%val.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
%val.fabs.fneg = fsub <2 x half> <half -0.0, half -0.0>, %val.fabs
%val.fabs.fneg = fneg <2 x half> %val.fabs
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %val.fabs.fneg)
store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
ret void
Expand All @@ -340,7 +340,7 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_var_v2f16(<2 x half> addrspa
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
%val = load <2 x half>, <2 x half> addrspace(1)* %gep
%fneg.val = fsub <2 x half> <half -0.0, half -0.0>, %val
%fneg.val = fneg <2 x half> %val
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %fneg.val)
store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
ret void
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/fcanonicalize.ll
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ define amdgpu_kernel void @v_test_canonicalize_fabs_var_f32(float addrspace(1)*
define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f32(float addrspace(1)* %out) #1 {
%val = load float, float addrspace(1)* %out
%val.fabs = call float @llvm.fabs.f32(float %val)
%val.fabs.fneg = fsub float -0.0, %val.fabs
%val.fabs.fneg = fneg float %val.fabs
%canonicalized = call float @llvm.canonicalize.f32(float %val.fabs.fneg)
store float %canonicalized, float addrspace(1)* %out
ret void
Expand All @@ -69,7 +69,7 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f32(float addrspace
; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @v_test_canonicalize_fneg_var_f32(float addrspace(1)* %out) #1 {
%val = load float, float addrspace(1)* %out
%val.fneg = fsub float -0.0, %val
%val.fneg = fneg float %val
%canonicalized = call float @llvm.canonicalize.f32(float %val.fneg)
store float %canonicalized, float addrspace(1)* %out
ret void
Expand Down Expand Up @@ -264,7 +264,7 @@ define amdgpu_kernel void @v_test_canonicalize_fabs_var_f64(double addrspace(1)*
define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f64(double addrspace(1)* %out) #1 {
%val = load double, double addrspace(1)* %out
%val.fabs = call double @llvm.fabs.f64(double %val)
%val.fabs.fneg = fsub double -0.0, %val.fabs
%val.fabs.fneg = fneg double %val.fabs
%canonicalized = call double @llvm.canonicalize.f64(double %val.fabs.fneg)
store double %canonicalized, double addrspace(1)* %out
ret void
Expand All @@ -275,7 +275,7 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f64(double addrspac
; GCN: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @v_test_canonicalize_fneg_var_f64(double addrspace(1)* %out) #1 {
%val = load double, double addrspace(1)* %out
%val.fneg = fsub double -0.0, %val
%val.fneg = fneg double %val
%canonicalized = call double @llvm.canonicalize.f64(double %val.fneg)
store double %canonicalized, double addrspace(1)* %out
ret void
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ define amdgpu_kernel void @div_v4_c_by_x_25ulp(<4 x float> addrspace(1)* %arg) {
; GCN: global_store_dwordx4
define amdgpu_kernel void @div_v4_c_by_minus_x_25ulp(<4 x float> addrspace(1)* %arg) {
%load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16
%neg = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %load
%neg = fneg <4 x float> %load
%div = fdiv <4 x float> <float 2.000000e+00, float 1.000000e+00, float -1.000000e+00, float -2.000000e+00>, %neg, !fpmath !0
store <4 x float> %div, <4 x float> addrspace(1)* %arg, align 16
ret void
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ define amdgpu_kernel void @fmuladd_neg_2.0_neg_a_b_f16(half addrspace(1)* %out,
%r1 = load volatile half, half addrspace(1)* %gep.0
%r2 = load volatile half, half addrspace(1)* %gep.1

%r1.fneg = fsub half -0.000000e+00, %r1
%r1.fneg = fneg half %r1

%r3 = tail call half @llvm.fmuladd.f16(half -2.0, half %r1.fneg, half %r2)
store half %r3, half addrspace(1)* %gep.out
Expand Down Expand Up @@ -287,7 +287,7 @@ define amdgpu_kernel void @fmuladd_2.0_neg_a_b_f16(half addrspace(1)* %out, half
%r1 = load volatile half, half addrspace(1)* %gep.0
%r2 = load volatile half, half addrspace(1)* %gep.1

%r1.fneg = fsub half -0.000000e+00, %r1
%r1.fneg = fneg half %r1

%r3 = tail call half @llvm.fmuladd.f16(half 2.0, half %r1.fneg, half %r2)
store half %r3, half addrspace(1)* %gep.out
Expand All @@ -312,7 +312,7 @@ define amdgpu_kernel void @fmuladd_2.0_a_neg_b_f16(half addrspace(1)* %out, half
%r1 = load volatile half, half addrspace(1)* %gep.0
%r2 = load volatile half, half addrspace(1)* %gep.1

%r2.fneg = fsub half -0.000000e+00, %r2
%r2.fneg = fneg half %r2

%r3 = tail call half @llvm.fmuladd.f16(half 2.0, half %r1, half %r2.fneg)
store half %r3, half addrspace(1)* %gep.out
Expand Down Expand Up @@ -494,8 +494,8 @@ define amdgpu_kernel void @neg_neg_mad_f16(half addrspace(1)* noalias nocapture
%a = load volatile half, half addrspace(1)* %gep0, align 2
%b = load volatile half, half addrspace(1)* %gep1, align 2
%c = load volatile half, half addrspace(1)* %gep2, align 2
%nega = fsub half -0.000000e+00, %a
%negb = fsub half -0.000000e+00, %b
%nega = fneg half %a
%negb = fneg half %b
%mul = fmul half %nega, %negb
%sub = fadd half %mul, %c
store half %sub, half addrspace(1)* %outgep, align 2
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ define amdgpu_kernel void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out,
%r1 = load volatile float, float addrspace(1)* %gep.0
%r2 = load volatile float, float addrspace(1)* %gep.1

%r1.fneg = fsub float -0.000000e+00, %r1
%r1.fneg = fneg float %r1

%r3 = tail call float @llvm.fmuladd.f32(float -2.0, float %r1.fneg, float %r2)
store float %r3, float addrspace(1)* %gep.out
Expand Down Expand Up @@ -307,7 +307,7 @@ define amdgpu_kernel void @fmuladd_2.0_neg_a_b_f32(float addrspace(1)* %out, flo
%r1 = load volatile float, float addrspace(1)* %gep.0
%r2 = load volatile float, float addrspace(1)* %gep.1

%r1.fneg = fsub float -0.000000e+00, %r1
%r1.fneg = fneg float %r1

%r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1.fneg, float %r2)
store float %r3, float addrspace(1)* %gep.out
Expand Down Expand Up @@ -339,7 +339,7 @@ define amdgpu_kernel void @fmuladd_2.0_a_neg_b_f32(float addrspace(1)* %out, flo
%r1 = load volatile float, float addrspace(1)* %gep.0
%r2 = load volatile float, float addrspace(1)* %gep.1

%r2.fneg = fsub float -0.000000e+00, %r2
%r2.fneg = fneg float %r2

%r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1, float %r2.fneg)
store float %r3, float addrspace(1)* %gep.out
Expand Down Expand Up @@ -517,8 +517,8 @@ define amdgpu_kernel void @neg_neg_mad_f32(float addrspace(1)* noalias nocapture
%a = load volatile float, float addrspace(1)* %gep0, align 4
%b = load volatile float, float addrspace(1)* %gep1, align 4
%c = load volatile float, float addrspace(1)* %gep2, align 4
%nega = fsub float -0.000000e+00, %a
%negb = fsub float -0.000000e+00, %b
%nega = fneg float %a
%negb = fneg float %b
%mul = fmul float %nega, %negb
%sub = fadd float %mul, %c
store float %sub, float addrspace(1)* %outgep, align 4
Expand Down
Loading

0 comments on commit 31c7a2f

Please sign in to comment.