-
Notifications
You must be signed in to change notification settings - Fork 15.4k
ValueTracking: Teach computeKnownFPClass that multiply can avoid denormals #171730
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ValueTracking: Teach computeKnownFPClass that multiply can avoid denormals #171730
Conversation
|
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-llvm-ir Author: Matt Arsenault (arsenm) ChangesMultiply by large constant can be used to scale denormal inputs into The test cases are mostly the existing nofpclass test for ldexp, Full diff: https://github.com/llvm/llvm-project/pull/171730.diff 3 Files Affected:
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 9cb6f19b9340c..730b9a30c77ed 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -5646,6 +5646,24 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
KnownFPClass KnownLHS, KnownRHS;
computeKnownFPClass(Op->getOperand(1), DemandedElts, NeedForNan, KnownRHS,
Q, Depth + 1);
+
+ const APFloat *CRHS;
+ if (match(Op->getOperand(1), m_APFloat(CRHS))) {
+ // Match denormal scaling pattern, similar to the case in ldexp. If the
+ // constant's exponent is sufficiently large, the result cannot be
+ // subnormal.
+
+ // TODO: Should do general ConstantFPRange analysis.
+ const fltSemantics &Flt =
+ Op->getType()->getScalarType()->getFltSemantics();
+ unsigned Precision = APFloat::semanticsPrecision(Flt);
+ const int MantissaBits = Precision - 1;
+
+ int MinKnownExponent = ilogb(*CRHS);
+ if (MinKnownExponent >= MantissaBits)
+ Known.knownNot(fcSubnormal);
+ }
+
if (!KnownRHS.isKnownNeverNaN())
break;
diff --git a/llvm/test/Transforms/Attributor/nofpclass-fmul.ll b/llvm/test/Transforms/Attributor/nofpclass-fmul.ll
index 6d0edf0681ed4..0ad34105d4cb6 100644
--- a/llvm/test/Transforms/Attributor/nofpclass-fmul.ll
+++ b/llvm/test/Transforms/Attributor/nofpclass-fmul.ll
@@ -35,7 +35,7 @@ define float @ret_mul_exponent_f32_22(float %arg0) {
}
define float @ret_mul_exponent_f32_23(float %arg0) {
-; CHECK-LABEL: define float @ret_mul_exponent_f32_23(
+; CHECK-LABEL: define nofpclass(sub) float @ret_mul_exponent_f32_23(
; CHECK-SAME: float [[ARG0:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[CALL:%.*]] = fmul float [[ARG0]], 0x4160000000000000
; CHECK-NEXT: ret float [[CALL]]
@@ -45,7 +45,7 @@ define float @ret_mul_exponent_f32_23(float %arg0) {
}
define float @ret_mul_exponent_f32_24(float %arg0) {
-; CHECK-LABEL: define float @ret_mul_exponent_f32_24(
+; CHECK-LABEL: define nofpclass(sub) float @ret_mul_exponent_f32_24(
; CHECK-SAME: float [[ARG0:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[CALL:%.*]] = fmul float [[ARG0]], 0x4170000000000000
; CHECK-NEXT: ret float [[CALL]]
@@ -55,7 +55,7 @@ define float @ret_mul_exponent_f32_24(float %arg0) {
}
define float @ret_mul_exponent_f32_23_nnan(float nofpclass(nan) %arg0) {
-; CHECK-LABEL: define nofpclass(nan) float @ret_mul_exponent_f32_23_nnan(
+; CHECK-LABEL: define nofpclass(nan sub) float @ret_mul_exponent_f32_23_nnan(
; CHECK-SAME: float nofpclass(nan) [[ARG0:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[CALL:%.*]] = fmul float [[ARG0]], 0x4160000000000000
; CHECK-NEXT: ret float [[CALL]]
@@ -85,7 +85,7 @@ define double @ret_mul_exponent_f64_51(double %arg0) {
}
define double @ret_mul_exponent_f64_52(double %arg0) {
-; CHECK-LABEL: define double @ret_mul_exponent_f64_52(
+; CHECK-LABEL: define nofpclass(sub) double @ret_mul_exponent_f64_52(
; CHECK-SAME: double [[ARG0:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[CALL:%.*]] = fmul double [[ARG0]], 0x4330000000000000
; CHECK-NEXT: ret double [[CALL]]
@@ -95,7 +95,7 @@ define double @ret_mul_exponent_f64_52(double %arg0) {
}
define double @ret_mul_exponent_f64_53(double %arg0) {
-; CHECK-LABEL: define double @ret_mul_exponent_f64_53(
+; CHECK-LABEL: define nofpclass(sub) double @ret_mul_exponent_f64_53(
; CHECK-SAME: double [[ARG0:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[CALL:%.*]] = fmul double [[ARG0]], 0x4340000000000000
; CHECK-NEXT: ret double [[CALL]]
@@ -125,7 +125,7 @@ define half @ret_mul_exponent_f16_9(half %arg0) {
}
define half @ret_mul_exponent_f16_10(half %arg0) {
-; CHECK-LABEL: define half @ret_mul_exponent_f16_10(
+; CHECK-LABEL: define nofpclass(sub) half @ret_mul_exponent_f16_10(
; CHECK-SAME: half [[ARG0:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[CALL:%.*]] = fmul half [[ARG0]], 0xH6400
; CHECK-NEXT: ret half [[CALL]]
@@ -145,7 +145,7 @@ define bfloat @ret_mul_exponent_bf16_6(bfloat %arg0) {
}
define bfloat @ret_mul_exponent_bf16_7(bfloat %arg0) {
-; CHECK-LABEL: define bfloat @ret_mul_exponent_bf16_7(
+; CHECK-LABEL: define nofpclass(sub) bfloat @ret_mul_exponent_bf16_7(
; CHECK-SAME: bfloat [[ARG0:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[CALL:%.*]] = fmul bfloat [[ARG0]], 0xR4300
; CHECK-NEXT: ret bfloat [[CALL]]
@@ -155,7 +155,7 @@ define bfloat @ret_mul_exponent_bf16_7(bfloat %arg0) {
}
define bfloat @ret_mul_exponent_bf16_8(bfloat %arg0) {
-; CHECK-LABEL: define bfloat @ret_mul_exponent_bf16_8(
+; CHECK-LABEL: define nofpclass(sub) bfloat @ret_mul_exponent_bf16_8(
; CHECK-SAME: bfloat [[ARG0:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[CALL:%.*]] = fmul bfloat [[ARG0]], 0xR4380
; CHECK-NEXT: ret bfloat [[CALL]]
@@ -215,7 +215,7 @@ define float @ret_mul_exponent_f32_neg127(float %arg0) {
}
define <2 x float> @ret_mul_exponent_v2f32_splat_23(<2 x float> %arg0) {
-; CHECK-LABEL: define <2 x float> @ret_mul_exponent_v2f32_splat_23(
+; CHECK-LABEL: define nofpclass(sub) <2 x float> @ret_mul_exponent_v2f32_splat_23(
; CHECK-SAME: <2 x float> [[ARG0:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[CALL:%.*]] = fmul <2 x float> [[ARG0]], splat (float 0x4160000000000000)
; CHECK-NEXT: ret <2 x float> [[CALL]]
@@ -267,7 +267,7 @@ define float @ret_mul_f32_0(float %arg0) {
}
define float @ret_mul_f32_inf(float %arg0) {
-; CHECK-LABEL: define float @ret_mul_f32_inf(
+; CHECK-LABEL: define nofpclass(sub) float @ret_mul_f32_inf(
; CHECK-SAME: float [[ARG0:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[CALL:%.*]] = fmul float [[ARG0]], 0x7FF0000000000000
; CHECK-NEXT: ret float [[CALL]]
diff --git a/llvm/test/Transforms/Attributor/nofpclass-nan-fmul.ll b/llvm/test/Transforms/Attributor/nofpclass-nan-fmul.ll
index fbf6c2e0981fb..f6bb1354cb74f 100644
--- a/llvm/test/Transforms/Attributor/nofpclass-nan-fmul.ll
+++ b/llvm/test/Transforms/Attributor/nofpclass-nan-fmul.ll
@@ -195,7 +195,7 @@ define float @ret_fmul_square_nnan_nzero(float nofpclass(nan zero) %arg) #0 {
}
define float @ret_fmul_ieee_inf(float %arg) {
-; CHECK-LABEL: define float @ret_fmul_ieee_inf
+; CHECK-LABEL: define nofpclass(sub) float @ret_fmul_ieee_inf
; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR4:[0-9]+]] {
; CHECK-NEXT: [[FMUL:%.*]] = fmul float [[ARG]], 0x7FF0000000000000
; CHECK-NEXT: ret float [[FMUL]]
|
andykaylor
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice!
fed7047 to
b552c70
Compare
89db664 to
ef8a54e
Compare
…rmals Multiply by large constant can be used to scale denormal inputs into a normal range. This pattern appears frequently in math function library implementations to make use of hardware instructions that do not support denormals. We already handle this case for ldexp, but now canonicalize ldexp by a constant to an fmul. The test cases are mostly the existing nofpclass test for ldexp, run through the new instcombine to replace ldexp with fmul.
b552c70 to
892f156
Compare

Multiply by large constant can be used to scale denormal inputs into
a normal range. This pattern appears frequently in math function library
implementations to make use of hardware instructions that do not support
denormals. We already handle this case for ldexp, but now canonicalize
ldexp by a constant to an fmul.
The test cases are mostly the existing nofpclass test for ldexp,
run through the new instcombine to replace ldexp with fmul.