-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[InstCombine] Set zero_is_poison for ctlz/cttz if they are only used as shift amounts #85035
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Yingwei Zheng (dtcxzyw) ChangesAlive2: https://alive2.llvm.org/ce/z/r-67t9 It would improve the codegen if the target doesn't provide a defined value for ctlz/cttz with zero. I found this while investigating #84763. Full diff: https://github.com/llvm/llvm-project/pull/85035.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index f5f3716d390d77..85e0fed1c93478 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1948,11 +1948,20 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
break;
case Intrinsic::cttz:
- case Intrinsic::ctlz:
+ case Intrinsic::ctlz: {
+ // If ctlz/cttz is only used as a shift amount, set is_zero_poison to true.
+ bool Changed = false;
+ if (II->hasOneUse() && match(II->getArgOperand(1), m_Zero()) &&
+ match(II->user_back(), m_Shift(m_Value(), m_Specific(II)))) {
+ replaceOperand(*II, 1, Builder.getTrue());
+ Changed = true;
+ }
if (auto *I = foldCttzCtlz(*II, *this))
return I;
+ if (Changed)
+ return II;
break;
-
+ }
case Intrinsic::ctpop:
if (auto *I = foldCtpop(*II, *this))
return I;
diff --git a/llvm/test/Transforms/InstCombine/shift-cttz-ctlz.ll b/llvm/test/Transforms/InstCombine/shift-cttz-ctlz.ll
new file mode 100644
index 00000000000000..2b2f820c9a0956
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/shift-cttz-ctlz.ll
@@ -0,0 +1,93 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define i32 @shl_cttz_false(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @shl_cttz_false(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CTTZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[Y]], i1 true), !range [[RNG0:![0-9]+]]
+; CHECK-NEXT: [[RES:%.*]] = shl i32 [[X]], [[CTTZ]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+entry:
+ %cttz = call i32 @llvm.cttz.i32(i32 %y, i1 false)
+ %res = shl i32 %x, %cttz
+ ret i32 %res
+}
+
+define i32 @shl_ctlz_false(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @shl_ctlz_false(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CTTZ:%.*]] = call i32 @llvm.ctlz.i32(i32 [[Y]], i1 true), !range [[RNG0]]
+; CHECK-NEXT: [[RES:%.*]] = shl i32 [[X]], [[CTTZ]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+entry:
+ %cttz = call i32 @llvm.ctlz.i32(i32 %y, i1 false)
+ %res = shl i32 %x, %cttz
+ ret i32 %res
+}
+
+define i32 @lshr_cttz_false(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @lshr_cttz_false(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CTTZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[Y]], i1 true), !range [[RNG0]]
+; CHECK-NEXT: [[RES:%.*]] = lshr i32 [[X]], [[CTTZ]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+entry:
+ %cttz = call i32 @llvm.cttz.i32(i32 %y, i1 false)
+ %res = lshr i32 %x, %cttz
+ ret i32 %res
+}
+
+define i32 @ashr_cttz_false(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @ashr_cttz_false(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CTTZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[Y]], i1 true), !range [[RNG0]]
+; CHECK-NEXT: [[RES:%.*]] = ashr i32 [[X]], [[CTTZ]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+entry:
+ %cttz = call i32 @llvm.cttz.i32(i32 %y, i1 false)
+ %res = ashr i32 %x, %cttz
+ ret i32 %res
+}
+
+define i32 @shl_cttz_false_multiuse(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @shl_cttz_false_multiuse(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CTTZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[Y]], i1 false), !range [[RNG0]]
+; CHECK-NEXT: call void @use(i32 [[CTTZ]])
+; CHECK-NEXT: [[RES:%.*]] = shl i32 [[X]], [[CTTZ]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+entry:
+ %cttz = call i32 @llvm.cttz.i32(i32 %y, i1 false)
+ call void @use(i32 %cttz)
+ %res = shl i32 %x, %cttz
+ ret i32 %res
+}
+
+define i32 @shl_cttz_as_lhs(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @shl_cttz_as_lhs(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CTTZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[Y]], i1 false), !range [[RNG0]]
+; CHECK-NEXT: [[RES:%.*]] = shl i32 [[CTTZ]], [[X]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+entry:
+ %cttz = call i32 @llvm.cttz.i32(i32 %y, i1 false)
+ %res = shl i32 %cttz, %x
+ ret i32 %res
+}
+
+declare void @use(i32)
+;.
+; CHECK: [[RNG0]] = !{i32 0, i32 33}
+;.
|
580e311
to
6b62484
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Alive2: https://alive2.llvm.org/ce/z/r-67t9
It would improve the codegen if the target doesn't provide a defined value for ctlz/cttz with zero.
I found this while investigating #84763.