-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
APFloat: Fix signed zero handling in minnum/maxnum #83376
Conversation
Follow the 2019 rules and order -0 as less than +0 and +0 as greater than -0. As currently defined this isn't required for the intrinsics, but is a better QoI. This will avoid the workaround in libc added by llvm#83158
@llvm/pr-subscribers-llvm-adt @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesFollow the 2019 rules and order -0 as less than +0 and +0 as greater than -0. As currently defined this isn't required for the intrinsics, but is a better QoI. This will avoid the workaround in libc added by #83158 Full diff: https://github.com/llvm/llvm-project/pull/83376.diff 7 Files Affected:
diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h
index 8c247bbcec90a2..598d00f592f8c0 100644
--- a/llvm/include/llvm/ADT/APFloat.h
+++ b/llvm/include/llvm/ADT/APFloat.h
@@ -1397,6 +1397,8 @@ inline APFloat minnum(const APFloat &A, const APFloat &B) {
return B;
if (B.isNaN())
return A;
+ if (A.isZero() && B.isZero() && (A.isNegative() != B.isNegative()))
+ return A.isNegative() ? A : B;
return B < A ? B : A;
}
@@ -1408,6 +1410,8 @@ inline APFloat maxnum(const APFloat &A, const APFloat &B) {
return B;
if (B.isNaN())
return A;
+ if (A.isZero() && B.isZero() && (A.isNegative() != B.isNegative()))
+ return A.isNegative() ? B : A;
return A < B ? B : A;
}
diff --git a/llvm/test/CodeGen/AMDGPU/fmaxnum.ll b/llvm/test/CodeGen/AMDGPU/fmaxnum.ll
index 09898f1442fb82..38640a18b5aee6 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaxnum.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaxnum.ll
@@ -152,7 +152,7 @@ define amdgpu_kernel void @constant_fold_fmax_f32_p0_n0(ptr addrspace(1) %out) #
; GCN-LABEL: {{^}}constant_fold_fmax_f32_n0_p0:
; GCN-NOT: v_max_f32_e32
-; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}}
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
; GCN: buffer_store_dword [[REG]]
define amdgpu_kernel void @constant_fold_fmax_f32_n0_p0(ptr addrspace(1) %out) #0 {
%val = call float @llvm.maxnum.f32(float -0.0, float 0.0)
diff --git a/llvm/test/CodeGen/AMDGPU/fminnum.ll b/llvm/test/CodeGen/AMDGPU/fminnum.ll
index 844d26a6225b40..65b311845a6b77 100644
--- a/llvm/test/CodeGen/AMDGPU/fminnum.ll
+++ b/llvm/test/CodeGen/AMDGPU/fminnum.ll
@@ -150,7 +150,7 @@ define amdgpu_kernel void @constant_fold_fmin_f32_p0_p0(ptr addrspace(1) %out) #
; GCN-LABEL: {{^}}constant_fold_fmin_f32_p0_n0:
; GCN-NOT: v_min_f32_e32
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0
+; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}}
; GCN: buffer_store_dword [[REG]]
define amdgpu_kernel void @constant_fold_fmin_f32_p0_n0(ptr addrspace(1) %out) #0 {
%val = call float @llvm.minnum.f32(float 0.0, float -0.0)
diff --git a/llvm/test/Transforms/InstCombine/maxnum.ll b/llvm/test/Transforms/InstCombine/maxnum.ll
index 87288b18cbcd9f..e140a5b405ea84 100644
--- a/llvm/test/Transforms/InstCombine/maxnum.ll
+++ b/llvm/test/Transforms/InstCombine/maxnum.ll
@@ -66,7 +66,7 @@ define float @constant_fold_maxnum_f32_p0_n0() {
define float @constant_fold_maxnum_f32_n0_p0() {
; CHECK-LABEL: @constant_fold_maxnum_f32_n0_p0(
-; CHECK-NEXT: ret float -0.000000e+00
+; CHECK-NEXT: ret float 0.000000e+00
;
%x = call float @llvm.maxnum.f32(float -0.0, float 0.0)
ret float %x
diff --git a/llvm/test/Transforms/InstCombine/minnum.ll b/llvm/test/Transforms/InstCombine/minnum.ll
index 8050f075595272..cc6171b9d8e6cb 100644
--- a/llvm/test/Transforms/InstCombine/minnum.ll
+++ b/llvm/test/Transforms/InstCombine/minnum.ll
@@ -60,7 +60,7 @@ define float @constant_fold_minnum_f32_p0_p0() {
define float @constant_fold_minnum_f32_p0_n0() {
; CHECK-LABEL: @constant_fold_minnum_f32_p0_n0(
-; CHECK-NEXT: ret float 0.000000e+00
+; CHECK-NEXT: ret float -0.000000e+00
;
%x = call float @llvm.minnum.f32(float 0.0, float -0.0)
ret float %x
@@ -199,7 +199,7 @@ define float @minnum_f32_1_minnum_p0_val_fmf3(float %x) {
define float @minnum_f32_p0_minnum_val_n0(float %x) {
; CHECK-LABEL: @minnum_f32_p0_minnum_val_n0(
-; CHECK-NEXT: [[Z:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float 0.000000e+00)
+; CHECK-NEXT: [[Z:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float -0.000000e+00)
; CHECK-NEXT: ret float [[Z]]
;
%y = call float @llvm.minnum.f32(float %x, float -0.0)
diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/min-max.ll b/llvm/test/Transforms/InstSimplify/ConstProp/min-max.ll
index a5f5d4e12ed842..9120649eb5c4f1 100644
--- a/llvm/test/Transforms/InstSimplify/ConstProp/min-max.ll
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/min-max.ll
@@ -49,6 +49,38 @@ define float @minnum_float() {
ret float %1
}
+define float @minnum_float_p0_n0() {
+; CHECK-LABEL: @minnum_float_p0_n0(
+; CHECK-NEXT: ret float -0.000000e+00
+;
+ %min = call float @llvm.minnum.f32(float 0.0, float -0.0)
+ ret float %min
+}
+
+define float @minnum_float_n0_p0() {
+; CHECK-LABEL: @minnum_float_n0_p0(
+; CHECK-NEXT: ret float -0.000000e+00
+;
+ %min = call float @llvm.minnum.f32(float -0.0, float 0.0)
+ ret float %min
+}
+
+define float @minnum_float_p0_qnan() {
+; CHECK-LABEL: @minnum_float_p0_qnan(
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %min = call float @llvm.minnum.f32(float 0.0, float 0x7FF8000000000000)
+ ret float %min
+}
+
+define float @minnum_float_qnan_p0() {
+; CHECK-LABEL: @minnum_float_qnan_p0(
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %min = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 0.0)
+ ret float %min
+}
+
define bfloat @minnum_bfloat() {
; CHECK-LABEL: @minnum_bfloat(
; CHECK-NEXT: ret bfloat 0xR40A0
@@ -95,7 +127,7 @@ define <4 x half> @minnum_half_vec() {
define <4 x float> @minnum_float_zeros_vec() {
; CHECK-LABEL: @minnum_float_zeros_vec(
-; CHECK-NEXT: ret <4 x float> <float 0.000000e+00, float -0.000000e+00, float 0.000000e+00, float -0.000000e+00>
+; CHECK-NEXT: ret <4 x float> <float 0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>
;
%1 = call <4 x float> @llvm.minnum.v4f32(<4 x float> <float 0.0, float -0.0, float 0.0, float -0.0>, <4 x float> <float 0.0, float 0.0, float -0.0, float -0.0>)
ret <4 x float> %1
@@ -109,6 +141,38 @@ define float @maxnum_float() {
ret float %1
}
+define float @maxnum_float_p0_n0() {
+; CHECK-LABEL: @maxnum_float_p0_n0(
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %max = call float @llvm.maxnum.f32(float 0.0, float -0.0)
+ ret float %max
+}
+
+define float @maxnum_float_n0_p0() {
+; CHECK-LABEL: @maxnum_float_n0_p0(
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %max = call float @llvm.maxnum.f32(float -0.0, float 0.0)
+ ret float %max
+}
+
+define float @maxnum_float_p0_qnan() {
+; CHECK-LABEL: @maxnum_float_p0_qnan(
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %max = call float @llvm.maxnum.f32(float 0.0, float 0x7FF8000000000000)
+ ret float %max
+}
+
+define float @maxnum_float_qnan_p0() {
+; CHECK-LABEL: @maxnum_float_qnan_p0(
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %max = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 0.0)
+ ret float %max
+}
+
define bfloat @maxnum_bfloat() {
; CHECK-LABEL: @maxnum_bfloat(
; CHECK-NEXT: ret bfloat 0xR4228
@@ -155,7 +219,7 @@ define <4 x half> @maxnum_half_vec() {
define <4 x float> @maxnum_float_zeros_vec() {
; CHECK-LABEL: @maxnum_float_zeros_vec(
-; CHECK-NEXT: ret <4 x float> <float 0.000000e+00, float -0.000000e+00, float 0.000000e+00, float -0.000000e+00>
+; CHECK-NEXT: ret <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float -0.000000e+00>
;
%1 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> <float 0.0, float -0.0, float 0.0, float -0.0>, <4 x float> <float 0.0, float 0.0, float -0.0, float -0.0>)
ret <4 x float> %1
diff --git a/llvm/unittests/ADT/APFloatTest.cpp b/llvm/unittests/ADT/APFloatTest.cpp
index baf055e503b7e7..6e4dda8351a1b1 100644
--- a/llvm/unittests/ADT/APFloatTest.cpp
+++ b/llvm/unittests/ADT/APFloatTest.cpp
@@ -578,6 +578,11 @@ TEST(APFloatTest, MinNum) {
EXPECT_EQ(1.0, minnum(f2, f1).convertToDouble());
EXPECT_EQ(1.0, minnum(f1, nan).convertToDouble());
EXPECT_EQ(1.0, minnum(nan, f1).convertToDouble());
+
+ APFloat zp(0.0);
+ APFloat zn(-0.0);
+ EXPECT_EQ(-0.0, minnum(zp, zn).convertToDouble());
+ EXPECT_EQ(-0.0, minnum(zn, zp).convertToDouble());
}
TEST(APFloatTest, MaxNum) {
@@ -589,6 +594,11 @@ TEST(APFloatTest, MaxNum) {
EXPECT_EQ(2.0, maxnum(f2, f1).convertToDouble());
EXPECT_EQ(1.0, maxnum(f1, nan).convertToDouble());
EXPECT_EQ(1.0, maxnum(nan, f1).convertToDouble());
+
+ APFloat zp(0.0);
+ APFloat zn(-0.0);
+ EXPECT_EQ(0.0, maxnum(zp, zn).convertToDouble());
+ EXPECT_EQ(0.0, maxnum(zn, zp).convertToDouble());
}
TEST(APFloatTest, Minimum) {
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Could also actually rename the functions to minimumNumber/maximumNumber.
Summary: These hacks can be removed now that llvm#83376 fixed the underlying problem.
Follow the 2019 rules and order -0 as less than +0 and +0 as greater than -0. As currently defined this isn't required for the intrinsics, but is a better QoI.
This will avoid the workaround in libc added by #83158