-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[InstCombine] Fold fmul X, -0.0
into copysign(0.0, -X)
#85772
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Yingwei Zheng (dtcxzyw) Changes
Full diff: https://github.com/llvm/llvm-project/pull/85772.diff 4 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 9d4c271f990d19..9582084e23b8b1 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -814,8 +814,17 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) {
if (match(Op1, m_SpecificFP(-1.0)))
return UnaryOperator::CreateFNegFMF(Op0, &I);
- // With no-nans: X * 0.0 --> copysign(0.0, X)
- if (I.hasNoNaNs() && match(Op1, m_PosZeroFP())) {
+ // With no-nans:
+ // X * 0.0 --> copysign(0.0, X)
+ // X * -0.0 --> copysign(0.0, -X)
+ if (match(Op1, m_AnyZeroFP()) &&
+ isKnownNeverNaN(&I, /*Depth=*/0, SQ.getWithInstruction(&I))) {
+ if (match(Op1, m_NegZeroFP())) {
+ Op0 = Builder.CreateFNegFMF(Op0, &I);
+ // Canonicalize -0.0 to 0.0
+ Op1 = ConstantFoldUnaryOpOperand(Instruction::FNeg, cast<Constant>(Op1),
+ DL);
+ }
CallInst *CopySign = Builder.CreateIntrinsic(Intrinsic::copysign,
{I.getType()}, {Op1, Op0}, &I);
return replaceInstUsesWith(I, CopySign);
diff --git a/llvm/test/Transforms/InstCombine/binop-itofp.ll b/llvm/test/Transforms/InstCombine/binop-itofp.ll
index c72e4ac413a3ed..4a78e0058e4917 100644
--- a/llvm/test/Transforms/InstCombine/binop-itofp.ll
+++ b/llvm/test/Transforms/InstCombine/binop-itofp.ll
@@ -1012,7 +1012,7 @@ define float @missed_nonzero_check_on_constant_for_si_fmul(i1 %c, i1 %.b, ptr %g
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C:%.*]], i32 65529, i32 53264
; CHECK-NEXT: [[CONV_I:%.*]] = trunc i32 [[SEL]] to i16
; CHECK-NEXT: [[CONV1_I:%.*]] = sitofp i16 [[CONV_I]] to float
-; CHECK-NEXT: [[MUL3_I_I:%.*]] = fmul float [[CONV1_I]], 0.000000e+00
+; CHECK-NEXT: [[MUL3_I_I:%.*]] = call float @llvm.copysign.f32(float 0.000000e+00, float [[CONV1_I]])
; CHECK-NEXT: store i32 [[SEL]], ptr [[G_2345:%.*]], align 4
; CHECK-NEXT: ret float [[MUL3_I_I]]
;
@@ -1031,7 +1031,7 @@ define <2 x float> @missed_nonzero_check_on_constant_for_si_fmul_vec(i1 %c, i1 %
; CHECK-NEXT: [[CONV_I_V:%.*]] = insertelement <2 x i16> poison, i16 [[CONV_I_S]], i64 0
; CHECK-NEXT: [[CONV_I:%.*]] = shufflevector <2 x i16> [[CONV_I_V]], <2 x i16> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[CONV1_I:%.*]] = sitofp <2 x i16> [[CONV_I]] to <2 x float>
-; CHECK-NEXT: [[MUL3_I_I:%.*]] = fmul <2 x float> [[CONV1_I]], zeroinitializer
+; CHECK-NEXT: [[MUL3_I_I:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> zeroinitializer, <2 x float> [[CONV1_I]])
; CHECK-NEXT: store i32 [[SEL]], ptr [[G_2345:%.*]], align 4
; CHECK-NEXT: ret <2 x float> [[MUL3_I_I]]
;
@@ -1050,7 +1050,8 @@ define float @negzero_check_on_constant_for_si_fmul(i1 %c, i1 %.b, ptr %g_2345)
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C:%.*]], i32 65529, i32 53264
; CHECK-NEXT: [[CONV_I:%.*]] = trunc i32 [[SEL]] to i16
; CHECK-NEXT: [[CONV1_I:%.*]] = sitofp i16 [[CONV_I]] to float
-; CHECK-NEXT: [[MUL3_I_I:%.*]] = fmul float [[CONV1_I]], -0.000000e+00
+; CHECK-NEXT: [[TMP1:%.*]] = fneg float [[CONV1_I]]
+; CHECK-NEXT: [[MUL3_I_I:%.*]] = call float @llvm.copysign.f32(float 0.000000e+00, float [[TMP1]])
; CHECK-NEXT: store i32 [[SEL]], ptr [[G_2345:%.*]], align 4
; CHECK-NEXT: ret float [[MUL3_I_I]]
;
@@ -1069,7 +1070,7 @@ define <2 x float> @nonzero_check_on_constant_for_si_fmul_vec_w_undef(i1 %c, i1
; CHECK-NEXT: [[CONV_I_V:%.*]] = insertelement <2 x i16> poison, i16 [[CONV_I_S]], i64 0
; CHECK-NEXT: [[CONV_I:%.*]] = shufflevector <2 x i16> [[CONV_I_V]], <2 x i16> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[CONV1_I:%.*]] = sitofp <2 x i16> [[CONV_I]] to <2 x float>
-; CHECK-NEXT: [[MUL3_I_I:%.*]] = fmul <2 x float> [[CONV1_I]], <float undef, float 0.000000e+00>
+; CHECK-NEXT: [[MUL3_I_I:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> <float undef, float 0.000000e+00>, <2 x float> [[CONV1_I]])
; CHECK-NEXT: store i32 [[SEL]], ptr [[G_2345:%.*]], align 4
; CHECK-NEXT: ret <2 x float> [[MUL3_I_I]]
;
@@ -1111,7 +1112,8 @@ define <2 x float> @nonzero_check_on_constant_for_si_fmul_negz_vec_w_undef(i1 %c
; CHECK-NEXT: [[CONV_I_V:%.*]] = insertelement <2 x i16> poison, i16 [[CONV_I_S]], i64 0
; CHECK-NEXT: [[CONV_I:%.*]] = shufflevector <2 x i16> [[CONV_I_V]], <2 x i16> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[CONV1_I:%.*]] = sitofp <2 x i16> [[CONV_I]] to <2 x float>
-; CHECK-NEXT: [[MUL3_I_I:%.*]] = fmul <2 x float> [[CONV1_I]], <float undef, float -0.000000e+00>
+; CHECK-NEXT: [[TMP1:%.*]] = fneg <2 x float> [[CONV1_I]]
+; CHECK-NEXT: [[MUL3_I_I:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> <float undef, float 0.000000e+00>, <2 x float> [[TMP1]])
; CHECK-NEXT: store i32 [[SEL]], ptr [[G_2345:%.*]], align 4
; CHECK-NEXT: ret <2 x float> [[MUL3_I_I]]
;
diff --git a/llvm/test/Transforms/InstCombine/fmul.ll b/llvm/test/Transforms/InstCombine/fmul.ll
index 7e7373e6ef5bdd..913837cb65eb48 100644
--- a/llvm/test/Transforms/InstCombine/fmul.ll
+++ b/llvm/test/Transforms/InstCombine/fmul.ll
@@ -1268,13 +1268,95 @@ define half @mul_zero(half %x) {
ret half %r
}
-; TODO: This could be fneg+copysign.
-
define half @mul_negzero_nnan(half %x) {
; CHECK-LABEL: @mul_negzero_nnan(
-; CHECK-NEXT: [[R:%.*]] = fmul nnan half [[X:%.*]], 0xH8000
+; CHECK-NEXT: [[TMP1:%.*]] = fneg nnan half [[X:%.*]]
+; CHECK-NEXT: [[R:%.*]] = call nnan half @llvm.copysign.f16(half 0xH0000, half [[TMP1]])
; CHECK-NEXT: ret half [[R]]
;
%r = fmul nnan half %x, -0.0
ret half %r
}
+
+define float @mul_pos_zero_nnan_ninf(float nofpclass(inf nan) %a) {
+; CHECK-LABEL: @mul_pos_zero_nnan_ninf(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[RET:%.*]] = call float @llvm.copysign.f32(float 0.000000e+00, float [[A:%.*]])
+; CHECK-NEXT: ret float [[RET]]
+;
+entry:
+ %ret = fmul float %a, 0.000000e+00
+ ret float %ret
+}
+
+define float @mul_pos_zero_nnan(float nofpclass(nan) %a) {
+; CHECK-LABEL: @mul_pos_zero_nnan(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[RET:%.*]] = fmul float [[A:%.*]], 0.000000e+00
+; CHECK-NEXT: ret float [[RET]]
+;
+entry:
+ %ret = fmul float %a, 0.000000e+00
+ ret float %ret
+}
+
+define float @mul_pos_zero_nnan_ninf_fmf(float nofpclass(nan) %a) {
+; CHECK-LABEL: @mul_pos_zero_nnan_ninf_fmf(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[RET:%.*]] = fmul ninf float [[A:%.*]], 0.000000e+00
+; CHECK-NEXT: ret float [[RET]]
+;
+entry:
+ %ret = fmul ninf float %a, 0.000000e+00
+ ret float %ret
+}
+
+define float @mul_neg_zero_nnan_ninf(float nofpclass(inf nan) %a) {
+; CHECK-LABEL: @mul_neg_zero_nnan_ninf(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = fneg float [[A:%.*]]
+; CHECK-NEXT: [[RET:%.*]] = call float @llvm.copysign.f32(float 0.000000e+00, float [[TMP0]])
+; CHECK-NEXT: ret float [[RET]]
+;
+entry:
+ %ret = fmul float %a, -0.000000e+00
+ ret float %ret
+}
+
+define float @mul_neg_zero_nnan_fmf(float %a) {
+; CHECK-LABEL: @mul_neg_zero_nnan_fmf(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = fneg nnan float [[A:%.*]]
+; CHECK-NEXT: [[RET:%.*]] = call nnan float @llvm.copysign.f32(float 0.000000e+00, float [[TMP0]])
+; CHECK-NEXT: ret float [[RET]]
+;
+entry:
+ %ret = fmul nnan float %a, -0.000000e+00
+ ret float %ret
+}
+
+define float @mul_neg_zero_nnan_ninf_fmf(float nofpclass(inf nan) %a) {
+; CHECK-LABEL: @mul_neg_zero_nnan_ninf_fmf(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = fneg nnan ninf float [[A:%.*]]
+; CHECK-NEXT: [[RET:%.*]] = call nnan ninf float @llvm.copysign.f32(float 0.000000e+00, float [[TMP0]])
+; CHECK-NEXT: ret float [[RET]]
+;
+entry:
+ %ret = fmul nnan ninf float %a, -0.000000e+00
+ ret float %ret
+}
+
+; poison propagates through vector elements
+
+define <2 x float> @mul_neg_zero_nnan_ninf_vec(<2 x float> nofpclass(inf nan) %a) {
+; CHECK-LABEL: @mul_neg_zero_nnan_ninf_vec(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = fneg <2 x float> [[A:%.*]]
+; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> <float 0.000000e+00, float poison>, <2 x float> [[TMP0]])
+; CHECK-NEXT: ret <2 x float> [[RET]]
+;
+entry:
+ %ret = fmul <2 x float> %a, <float -0.0, float poison>
+ ret <2 x float> %ret
+}
diff --git a/llvm/test/Transforms/InstCombine/fpcast.ll b/llvm/test/Transforms/InstCombine/fpcast.ll
index 1f0bfbc40ac1b6..fee78ab315ea06 100644
--- a/llvm/test/Transforms/InstCombine/fpcast.ll
+++ b/llvm/test/Transforms/InstCombine/fpcast.ll
@@ -424,10 +424,7 @@ define i32 @fptosi_select(i1 %cond) {
define i32 @mul_pos_zero_convert(i32 %a) {
; CHECK-LABEL: @mul_pos_zero_convert(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[FP:%.*]] = sitofp i32 [[A:%.*]] to float
-; CHECK-NEXT: [[RET:%.*]] = fmul float [[FP]], 0.000000e+00
-; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[RET]] to i32
-; CHECK-NEXT: ret i32 [[CONV]]
+; CHECK-NEXT: ret i32 0
;
entry:
%fp = sitofp i32 %a to float
|
9466a62
to
fd81698
Compare
fd81698
to
f205f2c
Compare
isKnownNeverNaN(&I, /*Depth=*/0, SQ.getWithInstruction(&I)))) { | ||
if (FPC->isNegative()) | ||
Op0 = Builder.CreateFNegFMF(Op0, &I); | ||
Op1 = Constant::replaceUndefsWith( |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
…85787) Alive2: https://alive2.llvm.org/ce/z/vFykcZ Address the comment #85772 (comment). Unfortunately, non-splat vector constants are not supported because we haven't implemented constant folding of fabs with vector operands.
95be94b
to
86417bf
Compare
…lvm#85787) Alive2: https://alive2.llvm.org/ce/z/vFykcZ Address the comment llvm#85772 (comment). Unfortunately, non-splat vector constants are not supported because we haven't implemented constant folding of fabs with vector operands.
`fneg + copysign` is better than fmul for analysis/codegen. godbolt: https://godbolt.org/z/eEs6dGd1G Alive2: https://alive2.llvm.org/ce/z/K3M5BA
fneg + copysign
is better than fmul for analysis/codegen.godbolt: https://godbolt.org/z/eEs6dGd1G
Alive2: https://alive2.llvm.org/ce/z/K3M5BA