Skip to content

Commit

Permalink
[InstSimplify] Remove the remainder loop if we know the mask is alway…
Browse files Browse the repository at this point in the history
…s true

We check the loop trip count is known a power of 2 to determine
whether the tail loop can be eliminated in D146199.
However, the remainder loop of mask scalable loop can also be removed
If we know the mask is always going to be true for every vector iteration.
Depend on the assume of power-of-two vscale on D155350

proofs: https://alive2.llvm.org/ce/z/FkTMoy

Fix #63616.

Reviewed By: goldstein.w.n, nikic, david-arm, paulwalker-arm
Differential Revision: https://reviews.llvm.org/D154953
  • Loading branch information
vfdff committed Aug 1, 2023
1 parent 44d14a1 commit 3e386b2
Show file tree
Hide file tree
Showing 5 changed files with 265 additions and 429 deletions.
29 changes: 29 additions & 0 deletions llvm/lib/Analysis/InstructionSimplify.cpp
Expand Up @@ -78,6 +78,9 @@ static Value *simplifyInstructionWithOperands(Instruction *I,
ArrayRef<Value *> NewOps,
const SimplifyQuery &SQ,
unsigned MaxRecurse);
static Value *simplifyICmpWithDominatingAssume(CmpInst::Predicate Predicate,
Value *LHS, Value *RHS,
const SimplifyQuery &Q);

static Value *foldSelectWithBinaryOp(Value *Cond, Value *TrueVal,
Value *FalseVal) {
Expand Down Expand Up @@ -2116,6 +2119,32 @@ static Value *simplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
return Op0;
}

// and 2^x-1, 2^C --> 0 where x <= C.
const APInt *PowerC;
Value *Shift;
if (match(Op1, m_Power2(PowerC)) &&
match(Op0, m_Add(m_Value(Shift), m_AllOnes())) &&
isKnownToBeAPowerOfTwo(Shift, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI,
Q.DT)) {
KnownBits Known = computeKnownBits(Shift, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
// Use getActiveBits() to make use of the additional power of two knowledge
if (PowerC->getActiveBits() >= Known.getMaxValue().getActiveBits())
return ConstantInt::getNullValue(Op1->getType());
}

// not (-1 << x), 1 << C --> 0 where x <= C.
// Fold 1 << x into ~(-1 << x) in canonicalizeLowbitMask
if (match(Op1, m_Power2(PowerC)) && match(Op0, m_Not(m_Value(Shift))) &&
match(Shift, m_Shl(m_AllOnes(), m_Value(X)))) {
Value *ShiftC = ConstantInt::get(Op1->getType(), PowerC->logBase2());
if (auto *V =
simplifyICmpWithDominatingAssume(CmpInst::ICMP_ULE, X, ShiftC, Q)) {
auto *CV = cast<ConstantInt>(V);
if (CV->isOne())
return ConstantInt::getNullValue(Op1->getType());
}
}

// If we have a multiplication overflow check that is being 'and'ed with a
// check that one of the multipliers is not zero, we can omit the 'and', and
// only keep the overflow check.
Expand Down
10 changes: 2 additions & 8 deletions llvm/test/Transforms/InstCombine/and-add-shl.ll
Expand Up @@ -10,10 +10,7 @@ define i8 @and_add_shl(i8 %x) {
; CHECK-SAME: (i8 [[X:%.*]]) {
; CHECK-NEXT: [[OP1_P2:%.*]] = icmp ult i8 [[X]], 6
; CHECK-NEXT: call void @llvm.assume(i1 [[OP1_P2]])
; CHECK-NEXT: [[NOTMASK:%.*]] = shl nsw i8 -1, [[X]]
; CHECK-NEXT: [[SUB:%.*]] = and i8 [[NOTMASK]], 32
; CHECK-NEXT: [[R:%.*]] = xor i8 [[SUB]], 32
; CHECK-NEXT: ret i8 [[R]]
; CHECK-NEXT: ret i8 0
;
%op1_p2 = icmp ule i8 %x, 5
call void @llvm.assume(i1 %op1_p2)
Expand All @@ -29,10 +26,7 @@ define i8 @and_not_shl(i8 %x) {
; CHECK-SAME: (i8 [[X:%.*]]) {
; CHECK-NEXT: [[OP1_P2:%.*]] = icmp ult i8 [[X]], 6
; CHECK-NEXT: call void @llvm.assume(i1 [[OP1_P2]])
; CHECK-NEXT: [[SHIFT:%.*]] = shl i8 -1, [[X]]
; CHECK-NEXT: [[NOT:%.*]] = and i8 [[SHIFT]], 32
; CHECK-NEXT: [[R:%.*]] = xor i8 [[NOT]], 32
; CHECK-NEXT: ret i8 [[R]]
; CHECK-NEXT: ret i8 0
;
%op1_p2 = icmp ule i8 %x, 5
call void @llvm.assume(i1 %op1_p2)
Expand Down
12 changes: 2 additions & 10 deletions llvm/test/Transforms/InstCombine/rem-mul-shl.ll
Expand Up @@ -843,11 +843,7 @@ define i64 @urem_shl_vscale() {

define i64 @urem_shl_vscale_range() vscale_range(1,16) {
; CHECK-LABEL: @urem_shl_vscale_range(
; CHECK-NEXT: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[SHIFT:%.*]] = shl nuw nsw i64 [[VSCALE]], 2
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[SHIFT]], 2047
; CHECK-NEXT: [[REM:%.*]] = and i64 [[TMP1]], 1024
; CHECK-NEXT: ret i64 [[REM]]
; CHECK-NEXT: ret i64 0
;
%vscale = call i64 @llvm.vscale.i64()
%shift = shl nuw nsw i64 %vscale, 2
Expand All @@ -857,11 +853,7 @@ define i64 @urem_shl_vscale_range() vscale_range(1,16) {

define i64 @urem_vscale_range() vscale_range(1,16) {
; CHECK-LABEL: @urem_vscale_range(
; CHECK-NEXT: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[SHIFT:%.*]] = shl nuw nsw i64 [[VSCALE]], 6
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[SHIFT]], 2047
; CHECK-NEXT: [[REM:%.*]] = and i64 [[TMP1]], 1024
; CHECK-NEXT: ret i64 [[REM]]
; CHECK-NEXT: ret i64 0
;
%vscale = call i64 @llvm.vscale.i64()
%shift = shl nuw nsw i64 %vscale, 6
Expand Down

0 comments on commit 3e386b2

Please sign in to comment.