Skip to content

Commit

Permalink
[InstCombine] remove a buggy set of zext-icmp transforms
Browse files Browse the repository at this point in the history
The motivating case is an infinite loop shown with a reduced test from:
https://llvm.org/PR51762

To solve this, I'm proposing we delete the most obviously broken part of this code.

The bug example shows a fundamental problem: we ask computeKnownBits if a transform
will be profitable, alter the code by creating new instructions, then rely on
computeKnownBits to return the same answer to actually eliminate instructions.

But there's no guarantee that the results will be the same between the 1st and 2nd
calls. In the infinite loop example, we get different answers, so we add
instructions that conflict with some other transform, and we're stuck.

There's at least one other problem visible in the test diff for
`@zext_or_masked_bit_test_uses`: the code doesn't check uses properly, so we can
end up with extra instructions created.

Last, it's not clear if this set of transforms actually improves analysis or
codegen. I spot-checked a few targets and don't see a clear win:
https://godbolt.org/z/x87EWovso

If we do see a regression from this change, codegen seems like the right place to
add a cmp -> bit-hack fold.

If this is too big of a step, we could limit the computeKnownBits calls by not
passing a context instruction and/or limiting the recursion. I checked that those
would stop the infinite loop for PR51762, but that won't guarantee that some other
example does not fall into the same loop.

Differential Revision: https://reviews.llvm.org/D109440
  • Loading branch information
rotateright committed Sep 9, 2021
1 parent 7fc743f commit 97a4e7b
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 69 deletions.
48 changes: 3 additions & 45 deletions llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
Expand Up @@ -978,11 +978,7 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
return nullptr;
}

/// Transform (zext icmp) to bitwise / integer operations in order to
/// eliminate it. If DoTransform is false, just test whether the given
/// (zext icmp) can be transformed.
Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext,
bool DoTransform) {
Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext) {
// If we are just checking for a icmp eq of a single bit and zext'ing it
// to an integer, then shift the bit to the appropriate place and then
// cast to integer to avoid the comparison.
Expand All @@ -993,8 +989,6 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext,
// zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear.
if ((Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isNullValue()) ||
(Cmp->getPredicate() == ICmpInst::ICMP_SGT && Op1CV->isAllOnesValue())) {
if (!DoTransform) return Cmp;

Value *In = Cmp->getOperand(0);
Value *Sh = ConstantInt::get(In->getType(),
In->getType()->getScalarSizeInBits() - 1);
Expand Down Expand Up @@ -1026,8 +1020,6 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext,

APInt KnownZeroMask(~Known.Zero);
if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
if (!DoTransform) return Cmp;

bool isNE = Cmp->getPredicate() == ICmpInst::ICMP_NE;
if (!Op1CV->isNullValue() && (*Op1CV != KnownZeroMask)) {
// (X&4) == 2 --> false
Expand Down Expand Up @@ -1067,9 +1059,6 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext,
if (Cmp->hasOneUse() && match(Cmp->getOperand(1), m_ZeroInt()) &&
match(Cmp->getOperand(0),
m_OneUse(m_c_And(m_Shl(m_One(), m_Value(ShAmt)), m_Value(X))))) {
if (!DoTransform)
return Cmp;

if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
X = Builder.CreateNot(X);
Value *Lshr = Builder.CreateLShr(X, ShAmt);
Expand All @@ -1091,8 +1080,6 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext,
APInt KnownBits = KnownLHS.Zero | KnownLHS.One;
APInt UnknownBit = ~KnownBits;
if (UnknownBit.countPopulation() == 1) {
if (!DoTransform) return Cmp;

Value *Result = Builder.CreateXor(LHS, RHS);

// Mask off any bits that are set and won't be shifted away.
Expand Down Expand Up @@ -1330,45 +1317,16 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &CI) {
if (ICmpInst *Cmp = dyn_cast<ICmpInst>(Src))
return transformZExtICmp(Cmp, CI);

BinaryOperator *SrcI = dyn_cast<BinaryOperator>(Src);
if (SrcI && SrcI->getOpcode() == Instruction::Or) {
// zext (or icmp, icmp) -> or (zext icmp), (zext icmp) if at least one
// of the (zext icmp) can be eliminated. If so, immediately perform the
// according elimination.
ICmpInst *LHS = dyn_cast<ICmpInst>(SrcI->getOperand(0));
ICmpInst *RHS = dyn_cast<ICmpInst>(SrcI->getOperand(1));
if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() &&
LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType() &&
(transformZExtICmp(LHS, CI, false) ||
transformZExtICmp(RHS, CI, false))) {
// zext (or icmp, icmp) -> or (zext icmp), (zext icmp)
Value *LCast = Builder.CreateZExt(LHS, CI.getType(), LHS->getName());
Value *RCast = Builder.CreateZExt(RHS, CI.getType(), RHS->getName());
Value *Or = Builder.CreateOr(LCast, RCast, CI.getName());
if (auto *OrInst = dyn_cast<Instruction>(Or))
Builder.SetInsertPoint(OrInst);

// Perform the elimination.
if (auto *LZExt = dyn_cast<ZExtInst>(LCast))
transformZExtICmp(LHS, *LZExt);
if (auto *RZExt = dyn_cast<ZExtInst>(RCast))
transformZExtICmp(RHS, *RZExt);

return replaceInstUsesWith(CI, Or);
}
}

// zext(trunc(X) & C) -> (X & zext(C)).
Constant *C;
Value *X;
if (SrcI &&
match(SrcI, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Constant(C)))) &&
if (match(Src, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Constant(C)))) &&
X->getType() == CI.getType())
return BinaryOperator::CreateAnd(X, ConstantExpr::getZExt(C, CI.getType()));

// zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)).
Value *And;
if (SrcI && match(SrcI, m_OneUse(m_Xor(m_Value(And), m_Constant(C)))) &&
if (match(Src, m_OneUse(m_Xor(m_Value(And), m_Constant(C)))) &&
match(And, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Specific(C)))) &&
X->getType() == CI.getType()) {
Constant *ZC = ConstantExpr::getZExt(C, CI.getType());
Expand Down
8 changes: 2 additions & 6 deletions llvm/lib/Transforms/InstCombine/InstCombineInternal.h
Expand Up @@ -241,15 +241,11 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
///
/// \param ICI The icmp of the (zext icmp) pair we are interested in.
/// \parem CI The zext of the (zext icmp) pair we are interested in.
/// \param DoTransform Pass false to just test whether the given (zext icmp)
/// would be transformed. Pass true to actually perform the transformation.
///
/// \return null if the transformation cannot be performed. If the
/// transformation can be performed the new instruction that replaces the
/// (zext icmp) pair will be returned (if \p DoTransform is false the
/// unmodified \p ICI will be returned in this case).
Instruction *transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
bool DoTransform = true);
/// (zext icmp) pair will be returned.
Instruction *transformZExtICmp(ICmpInst *ICI, ZExtInst &CI);

Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI);

Expand Down
90 changes: 84 additions & 6 deletions llvm/test/Transforms/InstCombine/zext-or-icmp.ll
@@ -1,16 +1,14 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s

; Remove an icmp by using its operand in the subsequent logic directly.

define i8 @zext_or_icmp_icmp(i8 %a, i8 %b) {
; CHECK-LABEL: @zext_or_icmp_icmp(
; CHECK-NEXT: [[MASK:%.*]] = and i8 [[A:%.*]], 1
; CHECK-NEXT: [[TOBOOL1:%.*]] = icmp eq i8 [[MASK]], 0
; CHECK-NEXT: [[TOBOOL2:%.*]] = icmp eq i8 [[B:%.*]], 0
; CHECK-NEXT: [[TOBOOL22:%.*]] = zext i1 [[TOBOOL2]] to i8
; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[MASK]], 1
; CHECK-NEXT: [[ZEXT3:%.*]] = or i8 [[TMP1]], [[TOBOOL22]]
; CHECK-NEXT: ret i8 [[ZEXT3]]
; CHECK-NEXT: [[BOTHCOND:%.*]] = or i1 [[TOBOOL1]], [[TOBOOL2]]
; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[BOTHCOND]] to i8
; CHECK-NEXT: ret i8 [[ZEXT]]
;
%mask = and i8 %a, 1
%toBool1 = icmp eq i8 %mask, 0
Expand Down Expand Up @@ -165,4 +163,84 @@ define i8 @PR49475_infloop(i32 %t0, i16 %insert, i64 %e, i8 %i162) {
ret i8 %inc
}

; This would infinite loop because knownbits changed between checking
; if a transform was profitable and actually doing the transform.

define i1 @PR51762(i32 *%i, i32 %t0, i16 %t1, i64* %p, i32* %d, i32* %f, i32 %p2) {
; CHECK-LABEL: @PR51762(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_COND:%.*]]
; CHECK: for.cond:
; CHECK-NEXT: [[I_SROA_8_0:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[I_SROA_8_0_EXTRACT_TRUNC:%.*]], [[COND_TRUE:%.*]] ]
; CHECK-NEXT: br i1 undef, label [[COND_TRUE]], label [[FOR_END11:%.*]]
; CHECK: cond.true:
; CHECK-NEXT: [[I_SROA_8_0_EXTRACT_TRUNC]] = ashr i32 [[T0:%.*]], 31
; CHECK-NEXT: br label [[FOR_COND]]
; CHECK: for.end11:
; CHECK-NEXT: [[S1:%.*]] = sext i16 [[T1:%.*]] to i64
; CHECK-NEXT: [[SROA38:%.*]] = load i32, i32* [[I:%.*]], align 8
; CHECK-NEXT: [[INSERT_EXT51:%.*]] = zext i32 [[I_SROA_8_0]] to i64
; CHECK-NEXT: [[INSERT_SHIFT52:%.*]] = shl nuw i64 [[INSERT_EXT51]], 32
; CHECK-NEXT: [[INSERT_EXT39:%.*]] = zext i32 [[SROA38]] to i64
; CHECK-NEXT: [[INSERT_INSERT41:%.*]] = or i64 [[INSERT_SHIFT52]], [[INSERT_EXT39]]
; CHECK-NEXT: [[REM:%.*]] = urem i64 [[S1]], [[INSERT_INSERT41]]
; CHECK-NEXT: [[NE:%.*]] = icmp ne i64 [[REM]], 0
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[INSERT_INSERT41]], 0
; CHECK-NEXT: [[SPEC_SELECT57:%.*]] = or i1 [[NE]], [[CMP]]
; CHECK-NEXT: [[LOR_EXT:%.*]] = zext i1 [[SPEC_SELECT57]] to i32
; CHECK-NEXT: [[T2:%.*]] = load i32, i32* [[D:%.*]], align 4
; CHECK-NEXT: [[CONV15:%.*]] = sext i16 [[T1]] to i32
; CHECK-NEXT: [[CMP16:%.*]] = icmp sge i32 [[T2]], [[CONV15]]
; CHECK-NEXT: [[CONV17:%.*]] = zext i1 [[CMP16]] to i32
; CHECK-NEXT: [[T3:%.*]] = load i32, i32* [[F:%.*]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[T3]], [[CONV17]]
; CHECK-NEXT: store i32 [[ADD]], i32* [[F]], align 4
; CHECK-NEXT: [[REM18:%.*]] = srem i32 [[LOR_EXT]], [[ADD]]
; CHECK-NEXT: [[CONV19:%.*]] = zext i32 [[REM18]] to i64
; CHECK-NEXT: store i32 0, i32* [[D]], align 8
; CHECK-NEXT: [[R:%.*]] = icmp ult i64 [[INSERT_INSERT41]], [[CONV19]]
; CHECK-NEXT: call void @llvm.assume(i1 [[R]])
; CHECK-NEXT: ret i1 true
;
entry:
br label %for.cond

for.cond:
%i.sroa.8.0 = phi i32 [ undef, %entry ], [ %i.sroa.8.0.extract.trunc, %cond.true ]
br i1 undef, label %cond.true, label %for.end11

cond.true:
%i.sroa.8.0.extract.trunc = ashr i32 %t0, 31
br label %for.cond

for.end11:
%s1 = sext i16 %t1 to i64
%sroa38 = load i32, i32* %i, align 8
%insert.ext51 = zext i32 %i.sroa.8.0 to i64
%insert.shift52 = shl nuw i64 %insert.ext51, 32
%insert.ext39 = zext i32 %sroa38 to i64
%insert.insert41 = or i64 %insert.shift52, %insert.ext39
%rem = urem i64 %s1, %insert.insert41
%ne = icmp ne i64 %rem, 0
%cmp = icmp eq i64 %insert.insert41, 0
%spec.select57 = or i1 %ne, %cmp

%lor.ext = zext i1 %spec.select57 to i32
%t2 = load i32, i32* %d, align 4
%conv15 = sext i16 %t1 to i32
%cmp16 = icmp sge i32 %t2, %conv15
%conv17 = zext i1 %cmp16 to i32
%t3 = load i32, i32* %f, align 4
%add = add nsw i32 %t3, %conv17
store i32 %add, i32* %f, align 4
%rem18 = srem i32 %lor.ext, %add
%conv19 = zext i32 %rem18 to i64
%div = udiv i64 %insert.insert41, %conv19
%trunc33 = trunc i64 %div to i32
store i32 %trunc33, i32* %d, align 8
%r = icmp ult i64 %insert.insert41, %conv19
call void @llvm.assume(i1 %r)
ret i1 %r
}

declare void @llvm.assume(i1 noundef)
21 changes: 9 additions & 12 deletions llvm/test/Transforms/InstCombine/zext.ll
Expand Up @@ -410,17 +410,15 @@ define i32 @masked_bit_wrong_pred(i32 %x, i32 %y) {
ret i32 %r
}

; Assert that zext(or(masked_bit_test, icmp)) can be correctly transformed to
; or(shifted_masked_bit, zext(icmp))

define i32 @zext_or_masked_bit_test(i32 %a, i32 %b, i32 %x) {
; CHECK-LABEL: @zext_or_masked_bit_test(
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[A:%.*]], [[B]]
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 1
; CHECK-NEXT: [[CMP2:%.*]] = zext i1 [[CMP]] to i32
; CHECK-NEXT: [[Z3:%.*]] = or i32 [[TMP2]], [[CMP2]]
; CHECK-NEXT: ret i32 [[Z3]]
; CHECK-NEXT: [[SHL:%.*]] = shl i32 1, [[B:%.*]]
; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHL]], [[A:%.*]]
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[AND]], 0
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], [[B]]
; CHECK-NEXT: [[OR:%.*]] = or i1 [[TOBOOL]], [[CMP]]
; CHECK-NEXT: [[Z:%.*]] = zext i1 [[OR]] to i32
; CHECK-NEXT: ret i32 [[Z]]
;
%shl = shl i32 1, %b
%and = and i32 %shl, %a
Expand All @@ -439,9 +437,8 @@ define i32 @zext_or_masked_bit_test_uses(i32 %a, i32 %b, i32 %x) {
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], [[B]]
; CHECK-NEXT: [[OR:%.*]] = or i1 [[TOBOOL]], [[CMP]]
; CHECK-NEXT: call void @use1(i1 [[OR]])
; CHECK-NEXT: [[Z34:%.*]] = or i1 [[TOBOOL]], [[CMP]]
; CHECK-NEXT: [[Z3:%.*]] = zext i1 [[Z34]] to i32
; CHECK-NEXT: ret i32 [[Z3]]
; CHECK-NEXT: [[Z:%.*]] = zext i1 [[OR]] to i32
; CHECK-NEXT: ret i32 [[Z]]
;
%shl = shl i32 1, %b
%and = and i32 %shl, %a
Expand Down

0 comments on commit 97a4e7b

Please sign in to comment.