-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[BDCE] Handle multi-use and
/or
/xor
on demanded bits
#79688
[BDCE] Handle multi-use and
/or
/xor
on demanded bits
#79688
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Antonio Frighetto (antoniofrighetto) ChangesSimplify multi-use Fixes: #78596. Proofs: https://alive2.llvm.org/ce/z/BBdrCz. Full diff: https://github.com/llvm/llvm-project/pull/79688.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/BDCE.cpp b/llvm/lib/Transforms/Scalar/BDCE.cpp
index 1fa2c75b0f42ac1..178b5a0da469cdd 100644
--- a/llvm/lib/Transforms/Scalar/BDCE.cpp
+++ b/llvm/lib/Transforms/Scalar/BDCE.cpp
@@ -125,6 +125,33 @@ static bool bitTrackingDCE(Function &F, DemandedBits &DB) {
}
}
+ // Simplify select of ands when the mask does not affect the demanded bits.
+ if (SelectInst *SI = dyn_cast<SelectInst>(&I)) {
+ APInt Demanded = DB.getDemandedBits(SI);
+ if (!Demanded.isAllOnes()) {
+ for (Use &U : I.operands()) {
+ auto *And = dyn_cast<BinaryOperator>(&U);
+ if (!And || !And->hasOneUse() || And->getOpcode() != Instruction::And)
+ continue;
+
+ auto *CV = dyn_cast<ConstantInt>(And->getOperand(1));
+ if (!CV)
+ continue;
+
+ APInt Mask = CV->getValue();
+ if ((Mask | ~Demanded).isAllOnes()) {
+ clearAssumptionsOfUsers(And, DB);
+ U.set(And->getOperand(0));
+ Worklist.push_back(And);
+ Changed = true;
+ }
+ }
+ }
+
+ if (Changed)
+ continue;
+ }
+
for (Use &U : I.operands()) {
// DemandedBits only detects dead integer uses.
if (!U->getType()->isIntOrIntVectorTy())
diff --git a/llvm/test/Transforms/BDCE/select-multiuse.ll b/llvm/test/Transforms/BDCE/select-multiuse.ll
new file mode 100644
index 000000000000000..8ad91cc7458d114
--- /dev/null
+++ b/llvm/test/Transforms/BDCE/select-multiuse.ll
@@ -0,0 +1,122 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -passes=bdce < %s | FileCheck %s
+
+define void @select(i1 %c, i64 %a, i64 %b) {
+; CHECK-LABEL: define void @select(
+; CHECK-SAME: i1 [[C:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i64 [[A]], i64 [[B]]
+; CHECK-NEXT: [[RET1:%.*]] = and i64 [[S]], 8
+; CHECK-NEXT: [[RET2:%.*]] = and i64 [[S]], 16
+; CHECK-NEXT: call void @use(i64 [[RET1]])
+; CHECK-NEXT: call void @use(i64 [[RET2]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %and1 = and i64 %a, 24 ; Mask: 0001 1000
+ %and2 = and i64 %b, 25 ; Mask: 0001 1001
+ %s = select i1 %c, i64 %and1, i64 %and2
+ %ret1 = and i64 %s, 8 ; Demanded bits: 0000 1000
+ %ret2 = and i64 %s, 16 ; Demanded bits: 0001 0000
+ call void @use(i64 %ret1)
+ call void @use(i64 %ret2)
+ ret void
+}
+
+define void @select_2(i1 %c, i64 %a, i64 %b) {
+; CHECK-LABEL: define void @select_2(
+; CHECK-SAME: i1 [[C:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[AND2:%.*]] = and i64 [[B]], 23
+; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i64 [[A]], i64 [[AND2]]
+; CHECK-NEXT: [[RET1:%.*]] = and i64 [[S]], 8
+; CHECK-NEXT: [[RET2:%.*]] = and i64 [[S]], 16
+; CHECK-NEXT: call void @use(i64 [[RET1]])
+; CHECK-NEXT: call void @use(i64 [[RET2]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %and1 = and i64 %a, 25 ; Mask: 0001 1001
+ %and2 = and i64 %b, 23 ; Mask: 0001 0111
+ %s = select i1 %c, i64 %and1, i64 %and2
+ %ret1 = and i64 %s, 8 ; Demanded bits: 0000 1000
+ %ret2 = and i64 %s, 16 ; Demanded bits: 0001 0000
+ call void @use(i64 %ret1)
+ call void @use(i64 %ret2)
+ ret void
+}
+
+define void @select_different_demanded(i1 %c, i64 %a, i64 %b) {
+; CHECK-LABEL: define void @select_different_demanded(
+; CHECK-SAME: i1 [[C:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[AND1:%.*]] = and i64 0, 24
+; CHECK-NEXT: [[AND2:%.*]] = and i64 [[B]], 25
+; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i64 [[AND1]], i64 [[AND2]]
+; CHECK-NEXT: [[RET1:%.*]] = and i64 [[S]], 3
+; CHECK-NEXT: [[RET2:%.*]] = and i64 [[S]], 7
+; CHECK-NEXT: call void @use(i64 [[RET1]])
+; CHECK-NEXT: call void @use(i64 [[RET2]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %and1 = and i64 %a, 24 ; Mask: 0001 1000
+ %and2 = and i64 %b, 25 ; Mask: 0001 1001
+ %s = select i1 %c, i64 %and1, i64 %and2
+ %ret1 = and i64 %s, 3 ; Demanded bits: 0000 0011
+ %ret2 = and i64 %s, 7 ; Demanded bits: 0000 0111
+ call void @use(i64 %ret1)
+ call void @use(i64 %ret2)
+ ret void
+}
+
+define void @and_multiuse(i1 %c, i64 %a, i64 %b) {
+; CHECK-LABEL: define void @and_multiuse(
+; CHECK-SAME: i1 [[C:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[AND2:%.*]] = and i64 [[B]], 25
+; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i64 [[A]], i64 [[AND2]]
+; CHECK-NEXT: [[RET1:%.*]] = and i64 [[S]], 8
+; CHECK-NEXT: [[RET2:%.*]] = and i64 [[S]], 16
+; CHECK-NEXT: call void @use(i64 [[RET1]])
+; CHECK-NEXT: call void @use2(i64 [[RET2]], i64 [[AND2]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %and1 = and i64 %a, 24
+ %and2 = and i64 %b, 25
+ %s = select i1 %c, i64 %and1, i64 %and2
+ %ret1 = and i64 %s, 8
+ %ret2 = and i64 %s, 16
+ call void @use(i64 %ret1)
+ call void @use2(i64 %ret2, i64 %and2)
+ ret void
+}
+
+define void @select_vectorized(i1 %c, <2 x i8> %a, <2 x i8> %b) {
+; CHECK-LABEL: define void @select_vectorized(
+; CHECK-SAME: i1 [[C:%.*]], <2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[AND1:%.*]] = and <2 x i8> [[A]], <i8 28, i8 28>
+; CHECK-NEXT: [[AND2:%.*]] = and <2 x i8> [[B]], <i8 29, i8 29>
+; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], <2 x i8> [[AND1]], <2 x i8> [[AND2]]
+; CHECK-NEXT: [[RET1:%.*]] = and <2 x i8> [[S]], <i8 4, i8 4>
+; CHECK-NEXT: [[RET2:%.*]] = and <2 x i8> [[S]], <i8 12, i8 12>
+; CHECK-NEXT: call void @use3(<2 x i8> [[RET1]])
+; CHECK-NEXT: call void @use3(<2 x i8> [[RET2]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %and1 = and <2 x i8> %a, <i8 28, i8 28>
+ %and2 = and <2 x i8> %b, <i8 29, i8 29>
+ %s = select i1 %c, <2 x i8> %and1, <2 x i8> %and2
+ %ret1 = and <2 x i8> %s, <i8 4, i8 4>
+ %ret2 = and <2 x i8> %s, <i8 12, i8 12>
+ call void @use3(<2 x i8> %ret1)
+ call void @use3(<2 x i8> %ret2)
+ ret void
+}
+
+declare void @use(i64)
+declare void @use2(i64, i64)
+declare void @use3(<2 x i8>)
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
...wat? This should not be looking at selects. You should only be looking at the demanded bits of the and.
Could you kindly provide an example showing how not restricting this to the
You mean we should only be looking at the demanded bits of the instruction itself when the operand analyzed is the |
You can replace the select with pretty much any instruction that transfers demanded bits, e.g. https://alive2.llvm.org/ce/z/MF1Ry3.
I would expect you to query the demanded bits of the |
For a single BDCE pass, I think a minimal testcase can be like: define void @src(i64 %a) {
entry:
%and = and i64 %a, 24
%ret1 = and i64 %and, 8
%ret2 = and i64 %and, 16
call void @use(i64 %ret1)
call void @use(i64 %ret2)
ret void
}
define void @tgt(i64 %a) {
entry:
%ret1 = and i64 %a, 8
%ret2 = and i64 %a, 16
call void @use(i64 %ret1)
call void @use(i64 %ret2)
ret void
} |
Reviewed, thanks, somehow I thought it would be easier to handle the |
ac76f66
to
c862fbb
Compare
select
of and
s on demanded bitsand
/or
on demanded bits
c862fbb
to
524bf49
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, this looks a lot more like what I had in mind.
entry: | ||
%and1 = and i64 %a, 24 ; Mask: 0001 1000 | ||
%and2 = and i64 %b, 25 ; Mask: 0001 1001 | ||
%or = or i64 %and1, %and2 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you please add a test variant with or disjoint
, to show that the flag gets dropped correctly?
@@ -9,8 +9,7 @@ declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) | |||
; First fshr operand is dead. | |||
define i32 @pr39771_fshr_multi_use_instr(i32 %a) { | |||
; CHECK-LABEL: @pr39771_fshr_multi_use_instr( | |||
; CHECK-NEXT: [[X:%.*]] = or i32 [[A:%.*]], 0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you change the or
operand to 1
(or something else that doesn't optimize away) instead? Dropping it is not really the point of this tests.
524bf49
to
f98b584
Compare
Reviewed, thanks. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
and
/or
on demanded bitsand
/or
/xor
on demanded bits
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
f98b584
to
3b03a6a
Compare
Simplify multi-use `and`/`or`/`xor` when these last do not affect the demanded bits being considered. Fixes: llvm#78596. Proofs: https://alive2.llvm.org/ce/z/EjuWHa.
3b03a6a
to
2073782
Compare
Simplify multi-use
and
oror
s when these last do not affect the demanded bits being considered.Fixes: #78596.
Proofs: https://alive2.llvm.org/ce/z/kmVqE-.