diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 4b029a28ead5b..c3c390738903a 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -499,6 +499,29 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, return true; } + switch (DefMI->getOpcode()) { + default: + break; + case TargetOpcode::G_FMINNUM_IEEE: + case TargetOpcode::G_FMAXNUM_IEEE: { + if (SNaN) + return true; + // This can return a NaN if either operand is an sNaN, or if both operands + // are NaN. + return (isKnownNeverNaN(DefMI->getOperand(1).getReg(), MRI) && + isKnownNeverSNaN(DefMI->getOperand(2).getReg(), MRI)) || + (isKnownNeverSNaN(DefMI->getOperand(1).getReg(), MRI) && + isKnownNeverNaN(DefMI->getOperand(2).getReg(), MRI)); + } + case TargetOpcode::G_FMINNUM: + case TargetOpcode::G_FMAXNUM: { + // Only one needs to be known not-nan, since it will be returned if the + // other ends up being one. + return isKnownNeverNaN(DefMI->getOperand(1).getReg(), MRI, SNaN) || + isKnownNeverNaN(DefMI->getOperand(2).getReg(), MRI, SNaN); + } + } + if (SNaN) { // FP operations quiet. For now, just handle the ones inserted during // legalization. diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll index d14389ed6fedc..2a72acb7a7d33 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll @@ -432,13 +432,7 @@ define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(float addrspace(1)* %out, ; SI-NEXT: v_add_f32_e32 v2, 1.0, v2 ; SI-NEXT: v_add_f32_e32 v3, 2.0, v3 ; SI-NEXT: v_add_f32_e32 v4, 4.0, v4 -; SI-NEXT: v_min_f32_e32 v5, v2, v3 -; SI-NEXT: v_max_f32_e32 v2, v2, v3 -; SI-NEXT: v_mul_f32_e32 v2, 1.0, v2 -; SI-NEXT: v_min_f32_e32 v2, v2, v4 -; SI-NEXT: v_mul_f32_e32 v3, 1.0, v5 -; SI-NEXT: v_mul_f32_e32 v2, 1.0, v2 -; SI-NEXT: v_max_f32_e32 v2, v3, v2 +; SI-NEXT: v_med3_f32 v2, v2, v3, v4 ; SI-NEXT: s_mov_b64 s[2:3], s[10:11] ; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 ; SI-NEXT: s_endpgm @@ -472,14 +466,8 @@ define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(float addrspace(1)* %out, ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: v_add_f32_e32 v4, 1.0, v7 ; VI-NEXT: v_add_f32_e32 v2, 2.0, v2 -; VI-NEXT: v_min_f32_e32 v5, v4, v2 -; VI-NEXT: v_max_f32_e32 v2, v4, v2 ; VI-NEXT: v_add_f32_e32 v3, 4.0, v3 -; VI-NEXT: v_mul_f32_e32 v2, 1.0, v2 -; VI-NEXT: v_min_f32_e32 v2, v2, v3 -; VI-NEXT: v_mul_f32_e32 v3, 1.0, v5 -; VI-NEXT: v_mul_f32_e32 v2, 1.0, v2 -; VI-NEXT: v_max_f32_e32 v2, v3, v2 +; VI-NEXT: v_med3_f32 v2, v4, v2, v3 ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm ; @@ -496,14 +484,8 @@ define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(float addrspace(1)* %out, ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_add_f32_e32 v1, 1.0, v1 ; GFX9-NEXT: v_add_f32_e32 v2, 2.0, v2 -; GFX9-NEXT: v_min_f32_e32 v4, v1, v2 -; GFX9-NEXT: v_max_f32_e32 v1, v1, v2 ; GFX9-NEXT: v_add_f32_e32 v3, 4.0, v3 -; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX9-NEXT: v_min_f32_e32 v1, v1, v3 -; GFX9-NEXT: v_max_f32_e32 v2, v4, v4 -; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX9-NEXT: v_max_f32_e32 v1, v2, v1 +; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1] ; GFX9-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir index 2d915a213cb42..f5bcc2040fa01 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir @@ -745,9 +745,8 @@ body: | ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] - ; SI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE1]], [[FCANONICALIZE2]] + ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; SI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] ; SI: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) ; VI-LABEL: name: test_fmaxnum_with_nonNaN_fmaxnum_argument_s32_ieee_mode_on ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -755,9 +754,8 @@ body: | ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] - ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE1]], [[FCANONICALIZE2]] + ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; VI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] ; VI: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) ; GFX9-LABEL: name: test_fmaxnum_with_nonNaN_fmaxnum_argument_s32_ieee_mode_on ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -765,9 +763,8 @@ body: | ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] - ; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE1]], [[FCANONICALIZE2]] + ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; GFX9: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_FCONSTANT float 0.000000e+00 @@ -842,9 +839,8 @@ body: | ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMINNUM_IEEE]] - ; SI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE1]], [[FCANONICALIZE2]] + ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] ; SI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) ; VI-LABEL: name: test_fmaxnum_with_nonNaN_fminnum_argument_s32_ieee_mode_on ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -852,9 +848,8 @@ body: | ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMINNUM_IEEE]] - ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE1]], [[FCANONICALIZE2]] + ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] ; VI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) ; GFX9-LABEL: name: test_fmaxnum_with_nonNaN_fminnum_argument_s32_ieee_mode_on ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -862,9 +857,8 @@ body: | ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMINNUM_IEEE]] - ; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE1]], [[FCANONICALIZE2]] + ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_FCONSTANT float 0.000000e+00 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir index 4c8f86577fc4a..298c7119a3c32 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir @@ -745,9 +745,8 @@ body: | ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMINNUM_IEEE]] - ; SI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE1]], [[FCANONICALIZE2]] + ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; SI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] ; SI: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) ; VI-LABEL: name: test_fminnum_with_nonNaN_fminnum_argument_s32_ieee_mode_on ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -755,9 +754,8 @@ body: | ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMINNUM_IEEE]] - ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE1]], [[FCANONICALIZE2]] + ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; VI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] ; VI: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) ; GFX9-LABEL: name: test_fminnum_with_nonNaN_fminnum_argument_s32_ieee_mode_on ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -765,9 +763,8 @@ body: | ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMINNUM_IEEE]] - ; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE1]], [[FCANONICALIZE2]] + ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; GFX9: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_FCONSTANT float 0.000000e+00 @@ -842,9 +839,8 @@ body: | ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] - ; SI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE1]], [[FCANONICALIZE2]] + ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] ; SI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) ; VI-LABEL: name: test_fminnum_with_nonNaN_fmaxnum_argument_s32_ieee_mode_on ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -852,9 +848,8 @@ body: | ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] - ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE1]], [[FCANONICALIZE2]] + ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] ; VI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) ; GFX9-LABEL: name: test_fminnum_with_nonNaN_fmaxnum_argument_s32_ieee_mode_on ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -862,9 +857,8 @@ body: | ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] - ; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE1]], [[FCANONICALIZE2]] + ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_FCONSTANT float 0.000000e+00