diff --git a/llvm/lib/Transforms/Scalar/BDCE.cpp b/llvm/lib/Transforms/Scalar/BDCE.cpp index 1fa2c75b0f42a..e99210ce2f228 100644 --- a/llvm/lib/Transforms/Scalar/BDCE.cpp +++ b/llvm/lib/Transforms/Scalar/BDCE.cpp @@ -23,10 +23,13 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" + using namespace llvm; +using namespace PatternMatch; #define DEBUG_TYPE "bdce" @@ -125,6 +128,38 @@ static bool bitTrackingDCE(Function &F, DemandedBits &DB) { } } + // Simplify and, or, xor when their mask does not affect the demanded bits. + if (auto *BO = dyn_cast(&I)) { + APInt Demanded = DB.getDemandedBits(BO); + if (!Demanded.isAllOnes()) { + const APInt *Mask; + if (match(BO->getOperand(1), m_APInt(Mask))) { + bool CanBeSimplified = false; + switch (BO->getOpcode()) { + case Instruction::Or: + case Instruction::Xor: + CanBeSimplified = !Demanded.intersects(*Mask); + break; + case Instruction::And: + CanBeSimplified = Demanded.isSubsetOf(*Mask); + break; + default: + // TODO: Handle more cases here. + break; + } + + if (CanBeSimplified) { + clearAssumptionsOfUsers(BO, DB); + BO->replaceAllUsesWith(BO->getOperand(0)); + Worklist.push_back(BO); + ++NumSimplified; + Changed = true; + continue; + } + } + } + } + for (Use &U : I.operands()) { // DemandedBits only detects dead integer uses. if (!U->getType()->isIntOrIntVectorTy()) diff --git a/llvm/test/Transforms/BDCE/binops-multiuse.ll b/llvm/test/Transforms/BDCE/binops-multiuse.ll new file mode 100644 index 0000000000000..0c03ca4d6fc57 --- /dev/null +++ b/llvm/test/Transforms/BDCE/binops-multiuse.ll @@ -0,0 +1,265 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S -passes=bdce < %s | FileCheck %s + +define void @or(i64 %a) { +; CHECK-LABEL: define void @or( +; CHECK-SAME: i64 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RET1:%.*]] = and i64 [[A]], 8 +; CHECK-NEXT: [[RET2:%.*]] = and i64 [[A]], 16 +; CHECK-NEXT: call void @use(i64 [[RET1]]) +; CHECK-NEXT: call void @use(i64 [[RET2]]) +; CHECK-NEXT: ret void +; +entry: + %or = or i64 %a, 3 ; Mask: 0000 0011 + %ret1 = and i64 %or, 8 ; Demanded bits: 0000 1000 + %ret2 = and i64 %or, 16 ; Demanded bits: 0001 0000 + call void @use(i64 %ret1) + call void @use(i64 %ret2) + ret void +} + +define void @xor(i64 %a) { +; CHECK-LABEL: define void @xor( +; CHECK-SAME: i64 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RET1:%.*]] = and i64 [[A]], 8 +; CHECK-NEXT: [[RET2:%.*]] = and i64 [[A]], 16 +; CHECK-NEXT: call void @use(i64 [[RET1]]) +; CHECK-NEXT: call void @use(i64 [[RET2]]) +; CHECK-NEXT: ret void +; +entry: + %xor = xor i64 %a, 3 ; Mask: 0000 0011 + %ret1 = and i64 %xor, 8 ; Demanded bits: 0000 1000 + %ret2 = and i64 %xor, 16 ; Demanded bits: 0001 0000 + call void @use(i64 %ret1) + call void @use(i64 %ret2) + ret void +} + +define void @and(i64 %a) { +; CHECK-LABEL: define void @and( +; CHECK-SAME: i64 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RET1:%.*]] = and i64 [[A]], 8 +; CHECK-NEXT: [[RET2:%.*]] = and i64 [[A]], 16 +; CHECK-NEXT: call void @use(i64 [[RET1]]) +; CHECK-NEXT: call void @use(i64 [[RET2]]) +; CHECK-NEXT: ret void +; +entry: + %and = and i64 %a, 24 ; Mask: 0001 1000 + %ret1 = and i64 %and, 8 ; Demanded bits: 0000 1000 + %ret2 = and i64 %and, 16 ; Demanded bits: 0001 0000 + call void @use(i64 %ret1) + call void @use(i64 %ret2) + ret void +} + +define void @or_of_and(i64 %a, i64 %b) { +; CHECK-LABEL: define void @or_of_and( +; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[OR:%.*]] = or i64 [[A]], [[B]] +; CHECK-NEXT: [[RET1:%.*]] = and i64 [[OR]], 8 +; CHECK-NEXT: [[RET2:%.*]] = and i64 [[OR]], 16 +; CHECK-NEXT: call void @use(i64 [[RET1]]) +; CHECK-NEXT: call void @use(i64 [[RET2]]) +; CHECK-NEXT: ret void +; +entry: + %and1 = and i64 %a, 24 ; Mask: 0001 1000 + %and2 = and i64 %b, 25 ; Mask: 0001 1001 + %or = or i64 %and1, %and2 + %ret1 = and i64 %or, 8 ; Demanded bits: 0000 1000 + %ret2 = and i64 %or, 16 ; Demanded bits: 0001 0000 + call void @use(i64 %ret1) + call void @use(i64 %ret2) + ret void +} + +define void @or_disjoint_of_and(i64 %a, i64 %b) { +; CHECK-LABEL: define void @or_disjoint_of_and( +; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[OR:%.*]] = or i64 [[A]], [[B]] +; CHECK-NEXT: [[RET1:%.*]] = and i64 [[OR]], 8 +; CHECK-NEXT: [[RET2:%.*]] = and i64 [[OR]], 16 +; CHECK-NEXT: call void @use(i64 [[RET1]]) +; CHECK-NEXT: call void @use(i64 [[RET2]]) +; CHECK-NEXT: ret void +; +entry: + %and1 = and i64 %a, 56 ; Mask: 0011 1000 + %and2 = and i64 %b, 25 ; Mask: 0001 1001 + %or = or disjoint i64 %and1, %and2 + %ret1 = and i64 %or, 8 ; Demanded bits: 0000 1000 + %ret2 = and i64 %or, 16 ; Demanded bits: 0001 0000 + call void @use(i64 %ret1) + call void @use(i64 %ret2) + ret void +} + +define void @select_of_and(i1 %c, i64 %a, i64 %b) { +; CHECK-LABEL: define void @select_of_and( +; CHECK-SAME: i1 [[C:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i64 [[A]], i64 [[B]] +; CHECK-NEXT: [[RET1:%.*]] = and i64 [[S]], 8 +; CHECK-NEXT: [[RET2:%.*]] = and i64 [[S]], 16 +; CHECK-NEXT: call void @use(i64 [[RET1]]) +; CHECK-NEXT: call void @use(i64 [[RET2]]) +; CHECK-NEXT: ret void +; +entry: + %and1 = and i64 %a, 24 ; Mask: 0001 1000 + %and2 = and i64 %b, 25 ; Mask: 0001 1001 + %s = select i1 %c, i64 %and1, i64 %and2 + %ret1 = and i64 %s, 8 ; Demanded bits: 0000 1000 + %ret2 = and i64 %s, 16 ; Demanded bits: 0001 0000 + call void @use(i64 %ret1) + call void @use(i64 %ret2) + ret void +} + +define void @select_of_and_2(i1 %c, i64 %a, i64 %b) { +; CHECK-LABEL: define void @select_of_and_2( +; CHECK-SAME: i1 [[C:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[AND2:%.*]] = and i64 [[B]], 23 +; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i64 [[A]], i64 [[AND2]] +; CHECK-NEXT: [[RET1:%.*]] = and i64 [[S]], 8 +; CHECK-NEXT: [[RET2:%.*]] = and i64 [[S]], 16 +; CHECK-NEXT: call void @use(i64 [[RET1]]) +; CHECK-NEXT: call void @use(i64 [[RET2]]) +; CHECK-NEXT: ret void +; +entry: + %and1 = and i64 %a, 25 ; Mask: 0001 1001 + %and2 = and i64 %b, 23 ; Mask: 0001 0111 + %s = select i1 %c, i64 %and1, i64 %and2 + %ret1 = and i64 %s, 8 ; Demanded bits: 0000 1000 + %ret2 = and i64 %s, 16 ; Demanded bits: 0001 0000 + call void @use(i64 %ret1) + call void @use(i64 %ret2) + ret void +} + +define void @select_of_and_multiuse(i1 %c, i64 %a, i64 %b) { +; CHECK-LABEL: define void @select_of_and_multiuse( +; CHECK-SAME: i1 [[C:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[AND2:%.*]] = and i64 [[B]], 25 +; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i64 [[A]], i64 [[AND2]] +; CHECK-NEXT: [[RET1:%.*]] = and i64 [[S]], 8 +; CHECK-NEXT: [[RET2:%.*]] = and i64 [[S]], 16 +; CHECK-NEXT: call void @use(i64 [[RET1]]) +; CHECK-NEXT: call void @use2(i64 [[RET2]], i64 [[AND2]]) +; CHECK-NEXT: ret void +; +entry: + %and1 = and i64 %a, 24 ; Mask: 0001 1000 + %and2 = and i64 %b, 25 ; Mask: 0001 1001 + %s = select i1 %c, i64 %and1, i64 %and2 + %ret1 = and i64 %s, 8 ; Demanded bits: 0000 1000 + %ret2 = and i64 %s, 16 ; Demanded bits: 0001 0000 + call void @use(i64 %ret1) + call void @use2(i64 %ret2, i64 %and2) + ret void +} + +define void @select_of_and_different_demanded(i1 %c, i64 %a, i64 %b) { +; CHECK-LABEL: define void @select_of_and_different_demanded( +; CHECK-SAME: i1 [[C:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[AND1:%.*]] = and i64 0, 24 +; CHECK-NEXT: [[AND2:%.*]] = and i64 [[B]], 25 +; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i64 [[AND1]], i64 [[AND2]] +; CHECK-NEXT: [[RET1:%.*]] = and i64 [[S]], 3 +; CHECK-NEXT: [[RET2:%.*]] = and i64 [[S]], 7 +; CHECK-NEXT: call void @use(i64 [[RET1]]) +; CHECK-NEXT: call void @use(i64 [[RET2]]) +; CHECK-NEXT: ret void +; +entry: + %and1 = and i64 %a, 24 ; Mask: 0001 1000 + %and2 = and i64 %b, 25 ; Mask: 0001 1001 + %s = select i1 %c, i64 %and1, i64 %and2 + %ret1 = and i64 %s, 3 ; Demanded bits: 0000 0011 + %ret2 = and i64 %s, 7 ; Demanded bits: 0000 0111 + call void @use(i64 %ret1) + call void @use(i64 %ret2) + ret void +} + +define void @select_of_or(i1 %c, i64 %a, i64 %b) { +; CHECK-LABEL: define void @select_of_or( +; CHECK-SAME: i1 [[C:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i64 [[A]], i64 [[B]] +; CHECK-NEXT: [[RET1:%.*]] = and i64 [[S]], 8 +; CHECK-NEXT: [[RET2:%.*]] = and i64 [[S]], 16 +; CHECK-NEXT: call void @use(i64 [[RET1]]) +; CHECK-NEXT: call void @use(i64 [[RET2]]) +; CHECK-NEXT: ret void +; +entry: + %or1 = or i64 %a, 3 ; Mask: 0000 0011 + %or2 = or i64 %b, 192 ; Mask: 1100 0000 + %s = select i1 %c, i64 %or1, i64 %or2 + %ret1 = and i64 %s, 8 ; Demanded bits: 0000 1000 + %ret2 = and i64 %s, 16 ; Demanded bits: 0001 0000 + call void @use(i64 %ret1) + call void @use(i64 %ret2) + ret void +} + +define void @select_of_xor(i1 %c, i64 %a, i64 %b) { +; CHECK-LABEL: define void @select_of_xor( +; CHECK-SAME: i1 [[C:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i64 [[A]], i64 [[B]] +; CHECK-NEXT: [[RET1:%.*]] = and i64 [[S]], 8 +; CHECK-NEXT: [[RET2:%.*]] = and i64 [[S]], 16 +; CHECK-NEXT: call void @use(i64 [[RET1]]) +; CHECK-NEXT: call void @use(i64 [[RET2]]) +; CHECK-NEXT: ret void +; +entry: + %xor1 = xor i64 %a, 128 ; Mask: 1000 0000 + %xor2 = xor i64 %b, 36 ; Mask: 0010 0100 + %s = select i1 %c, i64 %xor1, i64 %xor2 + %ret1 = and i64 %s, 8 ; Demanded bits: 0000 1000 + %ret2 = and i64 %s, 16 ; Demanded bits: 0001 0000 + call void @use(i64 %ret1) + call void @use(i64 %ret2) + ret void +} + +define void @select_vectorized(i1 %c, <2 x i8> %a, <2 x i8> %b) { +; CHECK-LABEL: define void @select_vectorized( +; CHECK-SAME: i1 [[C:%.*]], <2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], <2 x i8> [[A]], <2 x i8> [[B]] +; CHECK-NEXT: [[RET1:%.*]] = and <2 x i8> [[S]], +; CHECK-NEXT: [[RET2:%.*]] = and <2 x i8> [[S]], +; CHECK-NEXT: call void @use3(<2 x i8> [[RET1]]) +; CHECK-NEXT: call void @use3(<2 x i8> [[RET2]]) +; CHECK-NEXT: ret void +; +entry: + %and1 = and <2 x i8> %a, + %and2 = and <2 x i8> %b, + %s = select i1 %c, <2 x i8> %and1, <2 x i8> %and2 + %ret1 = and <2 x i8> %s, + %ret2 = and <2 x i8> %s, + call void @use3(<2 x i8> %ret1) + call void @use3(<2 x i8> %ret2) + ret void +} + +declare void @use(i64) +declare void @use2(i64, i64) +declare void @use3(<2 x i8>) diff --git a/llvm/test/Transforms/BDCE/dead-uses.ll b/llvm/test/Transforms/BDCE/dead-uses.ll index 1a19ff6fb11d5..85ee0dd8f2b90 100644 --- a/llvm/test/Transforms/BDCE/dead-uses.ll +++ b/llvm/test/Transforms/BDCE/dead-uses.ll @@ -9,14 +9,14 @@ declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) ; First fshr operand is dead. define i32 @pr39771_fshr_multi_use_instr(i32 %a) { ; CHECK-LABEL: @pr39771_fshr_multi_use_instr( -; CHECK-NEXT: [[X:%.*]] = or i32 [[A:%.*]], 0 +; CHECK-NEXT: [[X:%.*]] = or i32 [[A:%.*]], 2 ; CHECK-NEXT: [[B:%.*]] = tail call i32 @llvm.fshr.i32(i32 0, i32 [[X]], i32 1) ; CHECK-NEXT: [[C:%.*]] = lshr i32 [[B]], 23 ; CHECK-NEXT: [[D:%.*]] = xor i32 [[C]], [[B]] ; CHECK-NEXT: [[E:%.*]] = and i32 [[D]], 31 ; CHECK-NEXT: ret i32 [[E]] ; - %x = or i32 %a, 0 + %x = or i32 %a, 2 %b = tail call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 1) %c = lshr i32 %b, 23 %d = xor i32 %c, %b @@ -27,14 +27,14 @@ define i32 @pr39771_fshr_multi_use_instr(i32 %a) { ; First fshr operand is dead (vector variant). define <2 x i32> @pr39771_fshr_multi_use_instr_vec(<2 x i32> %a) { ; CHECK-LABEL: @pr39771_fshr_multi_use_instr_vec( -; CHECK-NEXT: [[X:%.*]] = or <2 x i32> [[A:%.*]], zeroinitializer +; CHECK-NEXT: [[X:%.*]] = or <2 x i32> [[A:%.*]], ; CHECK-NEXT: [[B:%.*]] = tail call <2 x i32> @llvm.fshr.v2i32(<2 x i32> zeroinitializer, <2 x i32> [[X]], <2 x i32> ) ; CHECK-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], ; CHECK-NEXT: [[D:%.*]] = xor <2 x i32> [[C]], [[B]] ; CHECK-NEXT: [[E:%.*]] = and <2 x i32> [[D]], ; CHECK-NEXT: ret <2 x i32> [[E]] ; - %x = or <2 x i32> %a, zeroinitializer + %x = or <2 x i32> %a, %b = tail call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> ) %c = lshr <2 x i32> %b, %d = xor <2 x i32> %c, %b