diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 895ccefd020f1..50873a252e1e1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -848,9 +848,9 @@ bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const { bool AMDGPUTargetLowering::isFNegFree(EVT VT) const { assert(VT.isFloatingPoint()); - return VT == MVT::f32 || VT == MVT::f64 || - (Subtarget->has16BitInsts() && VT == MVT::f16) || - (Subtarget->hasVOP3PInsts() && VT == MVT::v2f16); + // Report this based on the end legalized type. + VT = VT.getScalarType(); + return VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f16; } bool AMDGPUTargetLowering:: storeOfVectorConstantIsCheap(EVT MemVT, diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll index 2ed74ac1722f2..900d07c2d7ce4 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll @@ -2575,8 +2575,31 @@ define amdgpu_kernel void @multi_use_cost_to_fold_into_src(float addrspace(1)* % ret void } +; The AMDGPU combine to pull fneg into the FMA operands was being +; undone by the generic combine to pull the fneg out of the fma if +; !isFNegFree. We were reporting false for v2f32 even though it will +; be split into f32 where it will be free. +; GCN-LABEL: {{^}}fneg_fma_fneg_dagcombine_loop: +; GCN: s_brev_b32 [[NEGZERO:s[0-9]+]], 1{{$}} +; GCN-DAG: v_fma_f32 [[FMA0:v[0-9]+]], v2, -v4, [[NEGZERO]] +; GCN-DAG: v_fma_f32 [[FMA1:v[0-9]+]], v3, -v5, [[NEGZERO]] +; GCN-DAG: v_sub_f32_e32 [[SUB0:v[0-9]+]], [[FMA0]], v0 +; GCN-DAG: v_sub_f32_e32 [[SUB1:v[0-9]+]], [[FMA1]], v1 +; GCN-DAG: v_mul_f32_e32 v0, [[SUB0]], v4 +; GCN-DAG: v_mul_f32_e32 v1, [[SUB1]], v5 +; GCN: s_setpc_b64 +define <2 x float> @fneg_fma_fneg_dagcombine_loop(<2 x float> %arg, <2 x float> %arg1, <2 x float> %arg2) #0 { +bb: + %i3 = call fast <2 x float> @llvm.fma.v2f32(<2 x float> %arg1, <2 x float> %arg2, <2 x float> zeroinitializer) + %i4 = fadd fast <2 x float> %i3, %arg + %i5 = fneg <2 x float> %i4 + %i6 = fmul fast <2 x float> %i5, %arg2 + ret <2 x float> %i6 +} + declare i32 @llvm.amdgcn.workitem.id.x() #1 declare float @llvm.fma.f32(float, float, float) #1 +declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) declare float @llvm.fmuladd.f32(float, float, float) #1 declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #1 declare float @llvm.sin.f32(float) #1 @@ -2601,3 +2624,4 @@ declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0 attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind "unsafe-fp-math"="true" } +attributes #3 = { nounwind "no-signed-zeros-fp-math"="true" }