Skip to content

Commit

Permalink
[x86] Fix infinite loop inside DAG combiner with lzcnt feature.
Browse files Browse the repository at this point in the history
The issue affects targets supporting fast-lzcnt such as btver2.
This removes extraneous zext/trunc node insertions to fix the infinite
loop.
This fixes Issue #54694

Differential Revision: https://reviews.llvm.org/D122900

Reviewed By: RKSimon, spatel, lebedev.ri
  • Loading branch information
goussepi committed Apr 5, 2022
1 parent d849959 commit a3d5f1c
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 19 deletions.
21 changes: 8 additions & 13 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -47606,8 +47606,7 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG,
// into:
// srl(ctlz x), log2(bitsize(x))
// Input pattern is checked by caller.
static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, EVT ExtTy,
SelectionDAG &DAG) {
static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) {
SDValue Cmp = Op.getOperand(1);
EVT VT = Cmp.getOperand(0).getValueType();
unsigned Log2b = Log2_32(VT.getSizeInBits());
Expand All @@ -47618,7 +47617,7 @@ static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, EVT ExtTy,
SDValue Trunc = DAG.getZExtOrTrunc(Clz, dl, MVT::i32);
SDValue Scc = DAG.getNode(ISD::SRL, dl, MVT::i32, Trunc,
DAG.getConstant(Log2b, dl, MVT::i8));
return DAG.getZExtOrTrunc(Scc, dl, ExtTy);
return Scc;
}

// Try to transform:
Expand Down Expand Up @@ -47678,11 +47677,10 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG,
// or(srl(ctlz),srl(ctlz)).
// The dag combiner can then fold it into:
// srl(or(ctlz, ctlz)).
EVT VT = OR->getValueType(0);
SDValue NewLHS = lowerX86CmpEqZeroToCtlzSrl(LHS, VT, DAG);
SDValue NewLHS = lowerX86CmpEqZeroToCtlzSrl(LHS, DAG);
SDValue Ret, NewRHS;
if (NewLHS && (NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG)))
Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, NewLHS, NewRHS);
if (NewLHS && (NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, DAG)))
Ret = DAG.getNode(ISD::OR, SDLoc(OR), MVT::i32, NewLHS, NewRHS);

if (!Ret)
return SDValue();
Expand All @@ -47695,16 +47693,13 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG,
// Swap rhs with lhs to match or(setcc(eq, cmp, 0), or).
if (RHS->getOpcode() == ISD::OR)
std::swap(LHS, RHS);
NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG);
NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, DAG);
if (!NewRHS)
return SDValue();
Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, Ret, NewRHS);
Ret = DAG.getNode(ISD::OR, SDLoc(OR), MVT::i32, Ret, NewRHS);
}

if (Ret)
Ret = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), Ret);

return Ret;
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), Ret);
}

static SDValue foldMaskedMergeImpl(SDValue And0_L, SDValue And0_R,
Expand Down
46 changes: 40 additions & 6 deletions llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll
Expand Up @@ -154,11 +154,11 @@ entry:
define i32 @test_zext_cmp6(i32 %a, i32 %b, i32 %c) {
; FASTLZCNT-LABEL: test_zext_cmp6:
; FASTLZCNT: # %bb.0: # %entry
; FASTLZCNT-NEXT: lzcntl %edi, %eax
; FASTLZCNT-NEXT: lzcntl %esi, %ecx
; FASTLZCNT-NEXT: orl %eax, %ecx
; FASTLZCNT-NEXT: lzcntl %edi, %ecx
; FASTLZCNT-NEXT: lzcntl %edx, %eax
; FASTLZCNT-NEXT: lzcntl %esi, %esi
; FASTLZCNT-NEXT: orl %ecx, %eax
; FASTLZCNT-NEXT: orl %esi, %eax
; FASTLZCNT-NEXT: shrl $5, %eax
; FASTLZCNT-NEXT: retq
;
Expand Down Expand Up @@ -189,11 +189,11 @@ entry:
define i32 @test_zext_cmp7(i32 %a, i32 %b, i32 %c) {
; FASTLZCNT-LABEL: test_zext_cmp7:
; FASTLZCNT: # %bb.0: # %entry
; FASTLZCNT-NEXT: lzcntl %edi, %eax
; FASTLZCNT-NEXT: lzcntl %esi, %ecx
; FASTLZCNT-NEXT: orl %eax, %ecx
; FASTLZCNT-NEXT: lzcntl %edi, %ecx
; FASTLZCNT-NEXT: lzcntl %edx, %eax
; FASTLZCNT-NEXT: lzcntl %esi, %esi
; FASTLZCNT-NEXT: orl %ecx, %eax
; FASTLZCNT-NEXT: orl %esi, %eax
; FASTLZCNT-NEXT: shrl $5, %eax
; FASTLZCNT-NEXT: retq
;
Expand Down Expand Up @@ -335,3 +335,37 @@ entry:
%conv = zext i1 %0 to i32
ret i32 %conv
}

; PR54694 Fix an infinite loop in DAG combiner.
define i32 @test_zext_cmp12(i32 %0, i32 %1) {
; FASTLZCNT-LABEL: test_zext_cmp12:
; FASTLZCNT: # %bb.0:
; FASTLZCNT-NEXT: andl $131072, %edi # imm = 0x20000
; FASTLZCNT-NEXT: andl $131072, %esi # imm = 0x20000
; FASTLZCNT-NEXT: lzcntl %edi, %eax
; FASTLZCNT-NEXT: lzcntl %esi, %ecx
; FASTLZCNT-NEXT: orl %eax, %ecx
; FASTLZCNT-NEXT: movl $2, %eax
; FASTLZCNT-NEXT: shrl $5, %ecx
; FASTLZCNT-NEXT: subl %ecx, %eax
; FASTLZCNT-NEXT: retq
;
; NOFASTLZCNT-LABEL: test_zext_cmp12:
; NOFASTLZCNT: # %bb.0:
; NOFASTLZCNT-NEXT: testl $131072, %edi # imm = 0x20000
; NOFASTLZCNT-NEXT: sete %al
; NOFASTLZCNT-NEXT: testl $131072, %esi # imm = 0x20000
; NOFASTLZCNT-NEXT: sete %cl
; NOFASTLZCNT-NEXT: orb %al, %cl
; NOFASTLZCNT-NEXT: movl $2, %eax
; NOFASTLZCNT-NEXT: movzbl %cl, %ecx
; NOFASTLZCNT-NEXT: subl %ecx, %eax
; NOFASTLZCNT-NEXT: retq
%3 = and i32 %0, 131072
%4 = icmp eq i32 %3, 0
%5 = and i32 %1, 131072
%6 = icmp eq i32 %5, 0
%7 = select i1 %4, i1 true, i1 %6
%8 = select i1 %7, i32 1, i32 2
ret i32 %8
}

0 comments on commit a3d5f1c

Please sign in to comment.