-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[InstCombine] Canonicalize (sitofp x)
-> (uitofp x)
if x >= 0
#82404
Conversation
(uitofp x)
-> (sitofp x)
if x >= 0
@llvm/pr-subscribers-clang @llvm/pr-subscribers-llvm-transforms Author: None (goldsteinn) Changes
Full diff: https://github.com/llvm/llvm-project/pull/82404.diff 5 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index ed47de287302ed..c22f1d8561ebbb 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1929,7 +1929,11 @@ Instruction *InstCombinerImpl::visitFPToSI(FPToSIInst &FI) {
}
Instruction *InstCombinerImpl::visitUIToFP(CastInst &CI) {
- return commonCastTransforms(CI);
+ if (Instruction *R = commonCastTransforms(CI))
+ return R;
+ if (isKnownNonNegative(CI.getOperand(0), SQ))
+ return new SIToFPInst(CI.getOperand(0), CI.getType());
+ return nullptr;
}
Instruction *InstCombinerImpl::visitSIToFP(CastInst &CI) {
diff --git a/llvm/test/Transforms/InstCombine/add-sitofp.ll b/llvm/test/Transforms/InstCombine/add-sitofp.ll
index db44b806593b64..0b25f7acb085ba 100644
--- a/llvm/test/Transforms/InstCombine/add-sitofp.ll
+++ b/llvm/test/Transforms/InstCombine/add-sitofp.ll
@@ -63,6 +63,25 @@ define double @test_2(i32 %a, i32 %b) {
ret double %res
}
+define double @test_2_uitofp(i32 %a, i32 %b) {
+; CHECK-LABEL: @test_2_uitofp(
+; CHECK-NEXT: [[A_AND:%.*]] = and i32 [[A:%.*]], 1073741823
+; CHECK-NEXT: [[B_AND:%.*]] = and i32 [[B:%.*]], 1073741823
+; CHECK-NEXT: [[ADDCONV:%.*]] = add nuw nsw i32 [[A_AND]], [[B_AND]]
+; CHECK-NEXT: [[RES:%.*]] = sitofp i32 [[ADDCONV]] to double
+; CHECK-NEXT: ret double [[RES]]
+;
+ ; Drop two highest bits to guarantee that %a + %b doesn't overflow
+ %a_and = and i32 %a, 1073741823
+ %b_and = and i32 %b, 1073741823
+
+ %a_and_fp = uitofp i32 %a_and to double
+ %b_and_fp = uitofp i32 %b_and to double
+
+ %res = fadd double %a_and_fp, %b_and_fp
+ ret double %res
+}
+
define float @test_2_neg(i32 %a, i32 %b) {
; CHECK-LABEL: @test_2_neg(
; CHECK-NEXT: [[A_AND:%.*]] = and i32 [[A:%.*]], 1073741823
diff --git a/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll b/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll
index fad1176cc18fac..d997ab2e24bc5b 100644
--- a/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll
+++ b/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll
@@ -472,7 +472,7 @@ define float @ui32_clamp_and_cast_to_float(i32 %x) {
; CHECK-LABEL: @ui32_clamp_and_cast_to_float(
; CHECK-NEXT: [[LO_CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
; CHECK-NEXT: [[MIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[X]], i32 255)
-; CHECK-NEXT: [[MIN:%.*]] = uitofp i32 [[MIN1]] to float
+; CHECK-NEXT: [[MIN:%.*]] = sitofp i32 [[MIN1]] to float
; CHECK-NEXT: [[R:%.*]] = select i1 [[LO_CMP]], float 1.000000e+00, float [[MIN]]
; CHECK-NEXT: ret float [[R]]
;
@@ -488,7 +488,7 @@ define float @ui64_clamp_and_cast_to_float(i64 %x) {
; CHECK-LABEL: @ui64_clamp_and_cast_to_float(
; CHECK-NEXT: [[LO_CMP:%.*]] = icmp eq i64 [[X:%.*]], 0
; CHECK-NEXT: [[MIN1:%.*]] = call i64 @llvm.umin.i64(i64 [[X]], i64 255)
-; CHECK-NEXT: [[MIN:%.*]] = uitofp i64 [[MIN1]] to float
+; CHECK-NEXT: [[MIN:%.*]] = sitofp i64 [[MIN1]] to float
; CHECK-NEXT: [[R:%.*]] = select i1 [[LO_CMP]], float 1.000000e+00, float [[MIN]]
; CHECK-NEXT: ret float [[R]]
;
diff --git a/llvm/test/Transforms/InstCombine/fpcast.ll b/llvm/test/Transforms/InstCombine/fpcast.ll
index 3e5c6fd20b12da..ca47f068b4860f 100644
--- a/llvm/test/Transforms/InstCombine/fpcast.ll
+++ b/llvm/test/Transforms/InstCombine/fpcast.ll
@@ -266,7 +266,7 @@ define half @uint_to_fptrunc(i32 %x) {
define half @masked_uint_to_fptrunc1(i32 %x) {
; CHECK-LABEL: @masked_uint_to_fptrunc1(
; CHECK-NEXT: [[M:%.*]] = and i32 [[X:%.*]], 16777215
-; CHECK-NEXT: [[R:%.*]] = uitofp i32 [[M]] to half
+; CHECK-NEXT: [[R:%.*]] = sitofp i32 [[M]] to half
; CHECK-NEXT: ret half [[R]]
;
%m = and i32 %x, 16777215
@@ -278,7 +278,7 @@ define half @masked_uint_to_fptrunc1(i32 %x) {
define half @masked_uint_to_fptrunc2(i32 %x) {
; CHECK-LABEL: @masked_uint_to_fptrunc2(
; CHECK-NEXT: [[M:%.*]] = lshr i32 [[X:%.*]], 8
-; CHECK-NEXT: [[R:%.*]] = uitofp i32 [[M]] to half
+; CHECK-NEXT: [[R:%.*]] = sitofp i32 [[M]] to half
; CHECK-NEXT: ret half [[R]]
;
%m = lshr i32 %x, 8
@@ -290,7 +290,7 @@ define half @masked_uint_to_fptrunc2(i32 %x) {
define half @masked_uint_to_fptrunc3(i32 %x) {
; CHECK-LABEL: @masked_uint_to_fptrunc3(
; CHECK-NEXT: [[M:%.*]] = lshr i32 [[X:%.*]], 7
-; CHECK-NEXT: [[F:%.*]] = uitofp i32 [[M]] to float
+; CHECK-NEXT: [[F:%.*]] = sitofp i32 [[M]] to float
; CHECK-NEXT: [[R:%.*]] = fptrunc float [[F]] to half
; CHECK-NEXT: ret half [[R]]
;
@@ -314,7 +314,7 @@ define double @uint_to_fpext(i32 %x) {
define double @masked_uint_to_fpext1(i32 %x) {
; CHECK-LABEL: @masked_uint_to_fpext1(
; CHECK-NEXT: [[M:%.*]] = and i32 [[X:%.*]], 16777215
-; CHECK-NEXT: [[R:%.*]] = uitofp i32 [[M]] to double
+; CHECK-NEXT: [[R:%.*]] = sitofp i32 [[M]] to double
; CHECK-NEXT: ret double [[R]]
;
%m = and i32 %x, 16777215
@@ -326,7 +326,7 @@ define double @masked_uint_to_fpext1(i32 %x) {
define double @masked_uint_to_fpext2(i32 %x) {
; CHECK-LABEL: @masked_uint_to_fpext2(
; CHECK-NEXT: [[M:%.*]] = lshr i32 [[X:%.*]], 8
-; CHECK-NEXT: [[R:%.*]] = uitofp i32 [[M]] to double
+; CHECK-NEXT: [[R:%.*]] = sitofp i32 [[M]] to double
; CHECK-NEXT: ret double [[R]]
;
%m = lshr i32 %x, 8
@@ -338,7 +338,7 @@ define double @masked_uint_to_fpext2(i32 %x) {
define double @masked_uint_to_fpext3(i32 %x) {
; CHECK-LABEL: @masked_uint_to_fpext3(
; CHECK-NEXT: [[M:%.*]] = lshr i32 [[X:%.*]], 7
-; CHECK-NEXT: [[F:%.*]] = uitofp i32 [[M]] to float
+; CHECK-NEXT: [[F:%.*]] = sitofp i32 [[M]] to float
; CHECK-NEXT: [[R:%.*]] = fpext float [[F]] to double
; CHECK-NEXT: ret double [[R]]
;
diff --git a/llvm/test/Transforms/InstCombine/sitofp.ll b/llvm/test/Transforms/InstCombine/sitofp.ll
index 5e0cf944880071..086323624a2073 100644
--- a/llvm/test/Transforms/InstCombine/sitofp.ll
+++ b/llvm/test/Transforms/InstCombine/sitofp.ll
@@ -256,7 +256,7 @@ define i25 @consider_lowbits_masked_input(i25 %A) {
define i32 @overflow_masked_input(i32 %A) {
; CHECK-LABEL: @overflow_masked_input(
; CHECK-NEXT: [[M:%.*]] = and i32 [[A:%.*]], 16777217
-; CHECK-NEXT: [[B:%.*]] = uitofp i32 [[M]] to float
+; CHECK-NEXT: [[B:%.*]] = sitofp i32 [[M]] to float
; CHECK-NEXT: [[C:%.*]] = fptoui float [[B]] to i32
; CHECK-NEXT: ret i32 [[C]]
;
|
My thinking was |
Why is it a better canonicalization? We prefer unsigned over signed operations for everything else (zext, lshr, udiv, icmp u), so this would be a very unusual. |
There seems to be a lot more support for looking through Edit: The little test I added is an example. Also fp types are pretty inherently signed, so think signed -> signed as more intuitive/direct. |
Is there some inherent reason why that transform can only work with sitofp, rather than also uitofp with an s/signed/unsigned replacement everywhere? If there isn't, we should extend that transform instead. |
One more thing, some backend (x86 for example) have better codegen with and its not too uncommon for info from middle-end to be lost on the way to backend. Can be fixed up elsewhere (i.e during lowering) but think generally its indicative of the somewhat builtin "signedness" of floats and thus how If youre not convinced lmk and ill swap this patch around and start work on the regressions. |
An example extracted from |
I'm working on a patch to expand that fold. |
I think this says a lot more about the x86 architecture than about floating point numbers :) Pre-avx512 is well known to have very weird holes in the simd instruction set -- you could equally say that integers have an inherent preference for signed comparison because x86 can't compare packed unsigned numbers...
I think we should first address any unnecessary differences between sitofp and uitofp optimization, and then we can check the result of both canonicalization directions on @dtcxzyw's test set and see whether we see any clear benefit in one direction or the other. |
I think there's an asymmetry somewhere in PowerPC too. But it's for the other direction fptosi/fptoui. See this comment from
|
See #82555, |
61808ef
to
4a2a0aa
Compare
got #82555 in, now canonicalizes to unsigned. |
(uitofp x)
-> (sitofp x)
if x >= 0
(sitofp x)
-> (uitofp x)
if x >= 0
@dtcxzyw Can you please test the new version? |
Done. |
Regression:
|
Is that a regression? |
dtcxzyw/llvm-opt-benchmark#248 (comment) |
Ah I see, thought you meant the other way around. Okay, think what needs to change is need to be able to handle |
4a2a0aa
to
cb0ccc6
Compare
@dtcxzyw can you re-run? |
Heads up: we noticed at google that this is causing the following test to fail: https://github.com/google/swiftshader/blob/master/tests/ReactorUnitTests/ReactorUnitTests.cpp#L1312 I need to put a more proper reproducer, but thought that at least posting the heads-up might be faster to unblock us and maybe the problem is clear. I see that adding a flag vs reverting is already brought up in the last comment, what's the plan for that ? |
Ill revert this. I'll re-post if I get around to adding a flag. |
Reverted with: 6960ace If you can get the repro though, that would still be useful for when I revisit. |
Apart from the correctness issues, we've seen some regressions on various benchmarks from LLVM Test Suite after this patch. Specifically, around 3-5% regression on x86-64 in various metrics of the Interpolation benchmarks, and up to 30% regression on a number of floating point-centric benchmarks from https://github.com/llvm/llvm-test-suite/tree/main/SingleSource/Benchmarks/Misc (flops-4.c, flops-5.c, flops-6.c, flops-8.c, fp-convert.c). The numbers vary depending on the microarchitecture, with Skylake being less affected (on the order of ~10%) and AMD Rome showing larger regressions (up to 30%). |
Thank you for the info, well its reverted now so nothing todo. Although that does motivate me to get |
Well, I'm not sure how proper that wold be as a reproducer, I extracted the mentioned test to a program:
Before the change program exits with 0, after it exists with 10. I attached the generated IR before and after the change. It's only one line diff:
generation command is I'm trying to reduce the preprocessed file, tho not sure i'll keep the semantics of the failure. |
This should be sufficient, thank you! |
FYI this patch saves ~3% instructions for some benchmarks from LLVM-test-suite on RISC-V. |
Are you able to extract a reproducer that I can look at? |
See: #86141 which adds |
As noted when llvm#82404 was pushed (canonicalizing `sitofp` -> `uitofp`), different signedness on fp casts can have dramatic performance implications on different backends. So, it makes to create a reliable means for the backend to pick its cast signedness if either are correct. Further, this allows us to start canonicalizing `sitofp`- > `uitofp` which may easy middle end analysis.
Might have been related to: 1283646 |
This is essentially the same as llvm#82404 but has the `nneg` flag which allows the backend to reliably undo the transform.
Doesn't that program call |
As noted when llvm#82404 was pushed (canonicalizing `sitofp` -> `uitofp`), different signedness on fp casts can have dramatic performance implications on different backends. So, it makes to create a reliable means for the backend to pick its cast signedness if either are correct. Further, this allows us to start canonicalizing `sitofp`- > `uitofp` which may easy middle end analysis.
As noted when llvm#82404 was pushed (canonicalizing `sitofp` -> `uitofp`), different signedness on fp casts can have dramatic performance implications on different backends. So, it makes to create a reliable means for the backend to pick its cast signedness if either are correct. Further, this allows us to start canonicalizing `sitofp`- > `uitofp` which may easy middle end analysis.
As noted when llvm#82404 was pushed (canonicalizing `sitofp` -> `uitofp`), different signedness on fp casts can have dramatic performance implications on different backends. So, it makes to create a reliable means for the backend to pick its cast signedness if either are correct. Further, this allows us to start canonicalizing `sitofp`- > `uitofp` which may easy middle end analysis.
As noted when llvm#82404 was pushed (canonicalizing `sitofp` -> `uitofp`), different signedness on fp casts can have dramatic performance implications on different backends. So, it makes to create a reliable means for the backend to pick its cast signedness if either are correct. Further, this allows us to start canonicalizing `sitofp`- > `uitofp` which may easy middle end analysis.
As noted when llvm#82404 was pushed (canonicalizing `sitofp` -> `uitofp`), different signedness on fp casts can have dramatic performance implications on different backends. So, it makes to create a reliable means for the backend to pick its cast signedness if either are correct. Further, this allows us to start canonicalizing `sitofp`- > `uitofp` which may easy middle end analysis.
As noted when llvm#82404 was pushed (canonicalizing `sitofp` -> `uitofp`), different signedness on fp casts can have dramatic performance implications on different backends. So, it makes to create a reliable means for the backend to pick its cast signedness if either are correct. Further, this allows us to start canonicalizing `sitofp`- > `uitofp` which may easy middle end analysis.
As noted when llvm#82404 was pushed (canonicalizing `sitofp` -> `uitofp`), different signedness on fp casts can have dramatic performance implications on different backends. So, it makes to create a reliable means for the backend to pick its cast signedness if either are correct. Further, this allows us to start canonicalizing `sitofp`- > `uitofp` which may easy middle end analysis.
As noted when #82404 was pushed (canonicalizing `sitofp` -> `uitofp`), different signedness on fp casts can have dramatic performance implications on different backends. So, it makes to create a reliable means for the backend to pick its cast signedness if either are correct. Further, this allows us to start canonicalizing `sitofp`- > `uitofp` which may easy middle end analysis. Closes #86141
This is essentially the same as llvm#82404 but has the `nneg` flag which allows the backend to reliably undo the transform.
This is essentially the same as llvm#82404 but has the `nneg` flag which allows the backend to reliably undo the transform.
Just a standard canonicalization.
Proofs: https://alive2.llvm.org/ce/z/9W4VFm