diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 22d0708f547860..0ce53c30b379dd 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -471,6 +471,7 @@ class CodeGenPrepare { bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, Value *Arg1, CmpInst *Cmp, Intrinsic::ID IID); bool optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT); + bool optimizeURem(Instruction *Rem); bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT); bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT); void verifyBFIUpdates(Function &F); @@ -1974,6 +1975,133 @@ static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI, return true; } +static bool isRemOfLoopIncrementWithLoopInvariant(Instruction *Rem, + const LoopInfo *LI, + Value *&RemAmtOut, + PHINode *&LoopIncrPNOut) { + Value *Incr, *RemAmt; + // NB: If RemAmt is a power of 2 it *should* have been transformed by now. + if (!match(Rem, m_URem(m_Value(Incr), m_Value(RemAmt)))) + return false; + + // Only trivially analyzable loops. + Loop *L = LI->getLoopFor(Rem->getParent()); + if (!L || !L->getLoopPreheader() || !L->getLoopLatch()) + return false; + + // Find out loop increment PHI. + auto *PN = dyn_cast(Incr); + + if (!PN) + return false; + + // This isn't strictly necessary, what we really need is one increment and any + // amount of initial values all being the same. + if (PN->getNumIncomingValues() != 2) + return false; + + // Only works if the remainder amount is a loop invaraint + if (!L->isLoopInvariant(RemAmt)) + return false; + + // Is the PHI a loop increment? + auto LoopIncrInfo = getIVIncrement(PN, LI); + if (!LoopIncrInfo) + return false; + + // getIVIncrement finds the loop at PN->getParent(). This might be a different + // loop from the loop with Rem->getParent(). + if (L->getHeader() != PN->getParent()) + return false; + + // We need remainder_amount % increment_amount to be zero. Increment of one + // satisfies that without any special logic and is overwhelmingly the common + // case. + if (!match(LoopIncrInfo->second, m_One())) + return false; + + // Need the increment to not overflow. + if (!match(LoopIncrInfo->first, m_NUWAdd(m_Value(), m_Value()))) + return false; + + // Set output variables. + RemAmtOut = RemAmt; + LoopIncrPNOut = PN; + + return true; +} + +// Try to transform: +// +// for(i = Start; i < End; ++i) +// Rem = (i nuw+ IncrLoopInvariant) u% RemAmtLoopInvariant; +// +// -> +// +// Rem = (Start nuw+ IncrLoopInvariant) % RemAmtLoopInvariant; +// for(i = Start; i < End; ++i, ++rem) +// Rem = rem == RemAmtLoopInvariant ? 0 : Rem; +// +// Currently only implemented for `IncrLoopInvariant` being zero. +static bool foldURemOfLoopIncrement(Instruction *Rem, const DataLayout *DL, + const LoopInfo *LI, + SmallSet &FreshBBs, + bool IsHuge) { + Value *RemAmt; + PHINode *LoopIncrPN; + if (!isRemOfLoopIncrementWithLoopInvariant(Rem, LI, RemAmt, LoopIncrPN)) + return false; + + // Only non-constant remainder as the extra IV is probably not profitable + // in that case. + // + // Potential TODO(1): `urem` of a const ends up as `mul` + `shift` + `add`. If + // we can rule out register pressure and ensure this `urem` is executed each + // iteration, its probably profitable to handle the const case as well. + // + // Potential TODO(2): Should we have a check for how "nested" this remainder + // operation is? The new code runs every iteration so if the remainder is + // guarded behind unlikely conditions this might not be worth it. + if (match(RemAmt, m_ImmConstant())) + return false; + Loop *L = LI->getLoopFor(Rem->getParent()); + + Value *Start = LoopIncrPN->getIncomingValueForBlock(L->getLoopPreheader()); + + // Create new remainder with induction variable. + Type *Ty = Rem->getType(); + IRBuilder<> Builder(Rem->getContext()); + + Builder.SetInsertPoint(LoopIncrPN); + PHINode *NewRem = Builder.CreatePHI(Ty, 2); + + Builder.SetInsertPoint(cast( + LoopIncrPN->getIncomingValueForBlock(L->getLoopLatch()))); + // `(add (urem x, y), 1)` is always nuw. + Value *RemAdd = Builder.CreateNUWAdd(NewRem, ConstantInt::get(Ty, 1)); + Value *RemCmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, RemAdd, RemAmt); + Value *RemSel = + Builder.CreateSelect(RemCmp, Constant::getNullValue(Ty), RemAdd); + + NewRem->addIncoming(Start, L->getLoopPreheader()); + NewRem->addIncoming(RemSel, L->getLoopLatch()); + + // Insert all touched BBs. + FreshBBs.insert(LoopIncrPN->getParent()); + FreshBBs.insert(L->getLoopLatch()); + FreshBBs.insert(Rem->getParent()); + + replaceAllUsesWith(Rem, NewRem, FreshBBs, IsHuge); + Rem->eraseFromParent(); + return true; +} + +bool CodeGenPrepare::optimizeURem(Instruction *Rem) { + if (foldURemOfLoopIncrement(Rem, DL, LI, FreshBBs, IsHugeFunc)) + return true; + return false; +} + bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) { if (sinkCmpExpression(Cmp, *TLI)) return true; @@ -8360,6 +8488,10 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) { if (optimizeCmp(Cmp, ModifiedDT)) return true; + if (match(I, m_URem(m_Value(), m_Value()))) + if (optimizeURem(I)) + return true; + if (LoadInst *LI = dyn_cast(I)) { LI->setMetadata(LLVMContext::MD_invariant_group, nullptr); bool Modified = optimizeLoadExt(LI); diff --git a/llvm/test/CodeGen/X86/fold-loop-of-urem.ll b/llvm/test/CodeGen/X86/fold-loop-of-urem.ll new file mode 100644 index 00000000000000..b9fe4d7c79c7fc --- /dev/null +++ b/llvm/test/CodeGen/X86/fold-loop-of-urem.ll @@ -0,0 +1,1191 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s + +declare void @use.i32(i32) +declare void @use.2xi64(<2 x i64>) +declare void @do_stuff0() +declare void @do_stuff1() +declare i1 @get.i1() +declare i32 @get.i32() + +define void @simple_urem_to_sel(i32 %N, i32 %rem_amt) nounwind { +; CHECK-LABEL: simple_urem_to_sel: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: je .LBB0_4 +; CHECK-NEXT: # %bb.1: # %for.body.preheader +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movl %esi, %ebx +; CHECK-NEXT: movl %edi, %ebp +; CHECK-NEXT: xorl %r15d, %r15d +; CHECK-NEXT: xorl %r14d, %r14d +; CHECK-NEXT: xorl %r12d, %r12d +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_2: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl %r14d, %edi +; CHECK-NEXT: callq use.i32@PLT +; CHECK-NEXT: incl %r14d +; CHECK-NEXT: cmpl %ebx, %r14d +; CHECK-NEXT: cmovel %r15d, %r14d +; CHECK-NEXT: incl %r12d +; CHECK-NEXT: cmpl %r12d, %ebp +; CHECK-NEXT: jne .LBB0_2 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .LBB0_4: # %for.cond.cleanup +; CHECK-NEXT: retq +entry: + %cmp3.not = icmp eq i32 %N, 0 + br i1 %cmp3.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %rem = urem i32 %i.04, %rem_amt + tail call void @use.i32(i32 %rem) + %inc = add nuw i32 %i.04, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @simple_urem_to_sel_nested2(i32 %N, i32 %rem_amt) nounwind { +; CHECK-LABEL: simple_urem_to_sel_nested2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: je .LBB1_8 +; CHECK-NEXT: # %bb.1: # %for.body.preheader +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movl %esi, %ebx +; CHECK-NEXT: movl %edi, %ebp +; CHECK-NEXT: xorl %r15d, %r15d +; CHECK-NEXT: xorl %r14d, %r14d +; CHECK-NEXT: xorl %r12d, %r12d +; CHECK-NEXT: jmp .LBB1_2 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB1_5: # %for.body1 +; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 +; CHECK-NEXT: movl %r14d, %edi +; CHECK-NEXT: callq use.i32@PLT +; CHECK-NEXT: .LBB1_6: # %for.body.tail +; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 +; CHECK-NEXT: incl %r14d +; CHECK-NEXT: cmpl %ebx, %r14d +; CHECK-NEXT: cmovel %r15d, %r14d +; CHECK-NEXT: incl %r12d +; CHECK-NEXT: cmpl %r12d, %ebp +; CHECK-NEXT: je .LBB1_7 +; CHECK-NEXT: .LBB1_2: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: callq get.i1@PLT +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB1_6 +; CHECK-NEXT: # %bb.3: # %for.body0 +; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 +; CHECK-NEXT: callq get.i1@PLT +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: jne .LBB1_5 +; CHECK-NEXT: # %bb.4: # %for.body2 +; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 +; CHECK-NEXT: callq get.i1@PLT +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: jne .LBB1_5 +; CHECK-NEXT: jmp .LBB1_6 +; CHECK-NEXT: .LBB1_7: +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .LBB1_8: # %for.cond.cleanup +; CHECK-NEXT: retq +entry: + %cmp3.not = icmp eq i32 %N, 0 + br i1 %cmp3.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi i32 [ %inc, %for.body.tail ], [ 0, %entry ] + %cond0 = call i1 @get.i1() + br i1 %cond0, label %for.body0, label %for.body.tail +for.body0: + %cond1 = call i1 @get.i1() + br i1 %cond1, label %for.body1, label %for.body2 +for.body2: + %cond2 = call i1 @get.i1() + br i1 %cond2, label %for.body1, label %for.body.tail +for.body1: + %rem = urem i32 %i.04, %rem_amt + tail call void @use.i32(i32 %rem) + br label %for.body.tail +for.body.tail: + %inc = add nuw i32 %i.04, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @simple_urem_fail_bad_incr3(i32 %N, i32 %rem_amt) nounwind { +; CHECK-LABEL: simple_urem_fail_bad_incr3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: je .LBB2_9 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movl %esi, %ebx +; CHECK-NEXT: jmp .LBB2_2 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB2_6: # %for.body1 +; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1 +; CHECK-NEXT: movl %ebp, %eax +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: divl %ebx +; CHECK-NEXT: movl %edx, %edi +; CHECK-NEXT: callq use.i32@PLT +; CHECK-NEXT: .LBB2_7: # %for.body.tail +; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1 +; CHECK-NEXT: callq get.i1@PLT +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: jne .LBB2_8 +; CHECK-NEXT: .LBB2_2: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: callq get.i1@PLT +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB2_5 +; CHECK-NEXT: # %bb.3: # %for.body0 +; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1 +; CHECK-NEXT: callq get.i1@PLT +; CHECK-NEXT: movl %eax, %r14d +; CHECK-NEXT: callq get.i32@PLT +; CHECK-NEXT: testb $1, %r14b +; CHECK-NEXT: je .LBB2_7 +; CHECK-NEXT: # %bb.4: # in Loop: Header=BB2_2 Depth=1 +; CHECK-NEXT: movl %eax, %ebp +; CHECK-NEXT: incl %ebp +; CHECK-NEXT: jmp .LBB2_6 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB2_5: # %for.body2 +; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1 +; CHECK-NEXT: xorl %ebp, %ebp +; CHECK-NEXT: callq get.i1@PLT +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: jne .LBB2_6 +; CHECK-NEXT: jmp .LBB2_7 +; CHECK-NEXT: .LBB2_8: +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .LBB2_9: # %for.cond.cleanup +; CHECK-NEXT: retq +entry: + %cmp3.not = icmp eq i32 %N, 0 + br i1 %cmp3.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %cond0 = call i1 @get.i1() + br i1 %cond0, label %for.body0, label %for.body2 +for.body0: + %cond1 = call i1 @get.i1() + %val = call i32 @get.i32() + %inc = add nuw i32 %val, 1 + br i1 %cond1, label %for.body1, label %for.body.tail +for.body2: + %cond2 = call i1 @get.i1() + br i1 %cond2, label %for.body1, label %for.body.tail +for.body1: + %i.04 = phi i32 [ %inc, %for.body0], [ 0, %for.body2 ] + %rem = urem i32 %i.04, %rem_amt + tail call void @use.i32(i32 %rem) + br label %for.body.tail +for.body.tail: + %exitcond.not = call i1 @get.i1() + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @simple_urem_to_sel_vec(<2 x i64> %rem_amt) nounwind { +; CHECK-LABEL: simple_urem_to_sel_vec: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $56, %rsp +; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: pxor %xmm1, %xmm1 +; CHECK-NEXT: pxor %xmm0, %xmm0 +; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB3_1: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movdqa %xmm1, (%rsp) # 16-byte Spill +; CHECK-NEXT: movdqa %xmm1, %xmm0 +; CHECK-NEXT: callq use.2xi64@PLT +; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 +; CHECK-NEXT: movdqa (%rsp), %xmm2 # 16-byte Reload +; CHECK-NEXT: psubq %xmm1, %xmm2 +; CHECK-NEXT: movdqa %xmm2, %xmm0 +; CHECK-NEXT: movdqa %xmm2, %xmm3 +; CHECK-NEXT: pcmpeqd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,0,3,2] +; CHECK-NEXT: pand %xmm0, %xmm2 +; CHECK-NEXT: pandn %xmm3, %xmm2 +; CHECK-NEXT: movdqa %xmm2, (%rsp) # 16-byte Spill +; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: psubq %xmm1, %xmm0 +; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: callq get.i1@PLT +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: movdqa (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: je .LBB3_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: addq $56, %rsp +; CHECK-NEXT: retq +entry: + br label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi <2 x i64> [ %inc, %for.body ], [ zeroinitializer, %entry ] + %rem = urem <2 x i64> %i.04, %rem_amt + tail call void @use.2xi64(<2 x i64> %rem) + %inc = add nuw <2 x i64> %i.04, + %exitcond.not = call i1 @get.i1() + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @simple_urem_fail_bad_incr(i32 %N, i32 %rem_amt) nounwind { +; CHECK-LABEL: simple_urem_fail_bad_incr: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: je .LBB4_6 +; CHECK-NEXT: # %bb.1: # %for.body.preheader +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movl %esi, %ebx +; CHECK-NEXT: movl %edi, %ebp +; CHECK-NEXT: xorl %r14d, %r14d +; CHECK-NEXT: jmp .LBB4_2 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB4_4: # %for.body.tail +; CHECK-NEXT: # in Loop: Header=BB4_2 Depth=1 +; CHECK-NEXT: movl %r14d, %eax +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: divl %ebx +; CHECK-NEXT: movl %edx, %edi +; CHECK-NEXT: callq use.i32@PLT +; CHECK-NEXT: incl %r14d +; CHECK-NEXT: cmpl %ebp, %r14d +; CHECK-NEXT: je .LBB4_5 +; CHECK-NEXT: .LBB4_2: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: callq get.i1@PLT +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB4_4 +; CHECK-NEXT: # %bb.3: # %for.body0 +; CHECK-NEXT: # in Loop: Header=BB4_2 Depth=1 +; CHECK-NEXT: callq get.i32@PLT +; CHECK-NEXT: movl %eax, %r14d +; CHECK-NEXT: jmp .LBB4_4 +; CHECK-NEXT: .LBB4_5: +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .LBB4_6: # %for.cond.cleanup +; CHECK-NEXT: retq +entry: + %cmp3.not = icmp eq i32 %N, 0 + br i1 %cmp3.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.03 = phi i32 [ %inc, %for.body.tail ], [ 0, %entry ] + %cond0 = call i1 @get.i1() + br i1 %cond0, label %for.body0, label %for.body.tail +for.body0: + %some_val = call i32 @get.i32() + br label %for.body.tail + +for.body.tail: + %i.04 = phi i32 [ %i.03, %for.body ], [ %some_val, %for.body0 ] + %rem = urem i32 %i.04, %rem_amt + tail call void @use.i32(i32 %rem) + %inc = add nuw i32 %i.04, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @simple_urem_to_sel_second_acc(i32 %N, i32 %rem_amt) nounwind { +; CHECK-LABEL: simple_urem_to_sel_second_acc: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpl $2, %edi +; CHECK-NEXT: jb .LBB5_4 +; CHECK-NEXT: # %bb.1: # %for.body.preheader +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %r13 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: movl %esi, %ebx +; CHECK-NEXT: movl %edi, %ebp +; CHECK-NEXT: movl $1, %r15d +; CHECK-NEXT: xorl %r12d, %r12d +; CHECK-NEXT: xorl %r14d, %r14d +; CHECK-NEXT: xorl %r13d, %r13d +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB5_2: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl %r14d, %edi +; CHECK-NEXT: callq use.i32@PLT +; CHECK-NEXT: incl %r14d +; CHECK-NEXT: cmpl %ebx, %r14d +; CHECK-NEXT: cmovel %r12d, %r14d +; CHECK-NEXT: incl %r13d +; CHECK-NEXT: addl $2, %r15d +; CHECK-NEXT: cmpl %ebp, %r15d +; CHECK-NEXT: jbe .LBB5_2 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: popq %r13 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .LBB5_4: # %for.cond.cleanup +; CHECK-NEXT: retq +entry: + %cmp3.not = icmp ult i32 %N, 2 + br i1 %cmp3.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %i.05 = phi i32 [ %inc2, %for.body ], [ 1, %entry ] + %rem = urem i32 %i.04, %rem_amt + tail call void @use.i32(i32 %rem) + %inc = add nuw i32 %i.04, 1 + %inc2 = add nuw i32 %i.05, 2 + %exitcond.not = icmp ugt i32 %inc2, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @simple_urem_fail_srem(i32 %N, i32 %rem_amt) nounwind { +; CHECK-LABEL: simple_urem_fail_srem: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: je .LBB6_4 +; CHECK-NEXT: # %bb.1: # %for.body.preheader +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movl %esi, %ebx +; CHECK-NEXT: movl %edi, %ebp +; CHECK-NEXT: xorl %r14d, %r14d +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB6_2: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl %r14d, %eax +; CHECK-NEXT: cltd +; CHECK-NEXT: idivl %ebx +; CHECK-NEXT: movl %edx, %edi +; CHECK-NEXT: callq use.i32@PLT +; CHECK-NEXT: incl %r14d +; CHECK-NEXT: cmpl %r14d, %ebp +; CHECK-NEXT: jne .LBB6_2 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .LBB6_4: # %for.cond.cleanup +; CHECK-NEXT: retq +entry: + %cmp3.not = icmp eq i32 %N, 0 + br i1 %cmp3.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %rem = srem i32 %i.04, %rem_amt + tail call void @use.i32(i32 %rem) + %inc = add nuw i32 %i.04, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @simple_urem_fail_missing_nuw(i32 %N, i32 %rem_amt) nounwind { +; CHECK-LABEL: simple_urem_fail_missing_nuw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: je .LBB7_4 +; CHECK-NEXT: # %bb.1: # %for.body.preheader +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movl %esi, %ebx +; CHECK-NEXT: movl %edi, %ebp +; CHECK-NEXT: xorl %r14d, %r14d +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB7_2: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl %r14d, %eax +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: divl %ebx +; CHECK-NEXT: movl %edx, %edi +; CHECK-NEXT: callq use.i32@PLT +; CHECK-NEXT: incl %r14d +; CHECK-NEXT: cmpl %r14d, %ebp +; CHECK-NEXT: jne .LBB7_2 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .LBB7_4: # %for.cond.cleanup +; CHECK-NEXT: retq +entry: + %cmp3.not = icmp eq i32 %N, 0 + br i1 %cmp3.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %rem = urem i32 %i.04, %rem_amt + tail call void @use.i32(i32 %rem) + %inc = add nsw i32 %i.04, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @simple_urem_fail_bad_incr2(i32 %N, i32 %rem_amt) nounwind { +; CHECK-LABEL: simple_urem_fail_bad_incr2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: je .LBB8_4 +; CHECK-NEXT: # %bb.1: # %for.body.preheader +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movl %esi, %ebx +; CHECK-NEXT: movl %edi, %ebp +; CHECK-NEXT: xorl %r14d, %r14d +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB8_2: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl %r14d, %eax +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: divl %ebx +; CHECK-NEXT: movl %edx, %edi +; CHECK-NEXT: callq use.i32@PLT +; CHECK-NEXT: addl $2, %r14d +; CHECK-NEXT: cmpl %r14d, %ebp +; CHECK-NEXT: jne .LBB8_2 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .LBB8_4: # %for.cond.cleanup +; CHECK-NEXT: retq +entry: + %cmp3.not = icmp eq i32 %N, 0 + br i1 %cmp3.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %rem = urem i32 %i.04, %rem_amt + tail call void @use.i32(i32 %rem) + %inc = add nuw i32 %i.04, 2 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @simple_urem_non_zero_entry4(i32 %N, i32 %rem_amt) nounwind { +; CHECK-LABEL: simple_urem_non_zero_entry4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: je .LBB9_4 +; CHECK-NEXT: # %bb.1: # %for.body.preheader +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movl %esi, %ebx +; CHECK-NEXT: movl %edi, %ebp +; CHECK-NEXT: movl $4, %r14d +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB9_2: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl %r14d, %eax +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: divl %ebx +; CHECK-NEXT: movl %edx, %edi +; CHECK-NEXT: callq use.i32@PLT +; CHECK-NEXT: incl %r14d +; CHECK-NEXT: cmpl %r14d, %ebp +; CHECK-NEXT: jne .LBB9_2 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .LBB9_4: # %for.cond.cleanup +; CHECK-NEXT: retq +entry: + %cmp3.not = icmp eq i32 %N, 0 + br i1 %cmp3.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi i32 [ %inc, %for.body ], [ 4, %entry ] + %rem = urem i32 %i.04, %rem_amt + tail call void @use.i32(i32 %rem) + %inc = add nuw i32 %i.04, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @simple_urem_skip_const_rem_amt(i32 %N) nounwind { +; CHECK-LABEL: simple_urem_skip_const_rem_amt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: je .LBB10_4 +; CHECK-NEXT: # %bb.1: # %for.body.preheader +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movl %edi, %ebx +; CHECK-NEXT: addl $-4, %ebx +; CHECK-NEXT: movl $4, %ebp +; CHECK-NEXT: movl $2938661835, %r14d # imm = 0xAF286BCB +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB10_2: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl %ebp, %eax +; CHECK-NEXT: imulq %r14, %rax +; CHECK-NEXT: shrq $32, %rax +; CHECK-NEXT: movl %ebp, %ecx +; CHECK-NEXT: subl %eax, %ecx +; CHECK-NEXT: shrl %ecx +; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: shrl $4, %ecx +; CHECK-NEXT: leal (%rcx,%rcx,8), %eax +; CHECK-NEXT: leal (%rcx,%rax,2), %eax +; CHECK-NEXT: movl %ebp, %edi +; CHECK-NEXT: subl %eax, %edi +; CHECK-NEXT: callq use.i32@PLT +; CHECK-NEXT: incl %ebp +; CHECK-NEXT: decl %ebx +; CHECK-NEXT: jne .LBB10_2 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .LBB10_4: # %for.cond.cleanup +; CHECK-NEXT: retq +entry: + %cmp3.not = icmp eq i32 %N, 0 + br i1 %cmp3.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi i32 [ %inc, %for.body ], [ 4, %entry ] + %rem = urem i32 %i.04, 19 + tail call void @use.i32(i32 %rem) + %inc = add nuw i32 %i.04, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @simple_urem_fail_no_preheader_non_canonical(i32 %N, i32 %rem_amt) nounwind { +; CHECK-LABEL: simple_urem_fail_no_preheader_non_canonical: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movl %esi, %ebx +; CHECK-NEXT: movl %edi, %ebp +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: je .LBB11_1 +; CHECK-NEXT: # %bb.2: # %for.body1 +; CHECK-NEXT: movl $1, %r14d +; CHECK-NEXT: jmp .LBB11_3 +; CHECK-NEXT: .LBB11_1: +; CHECK-NEXT: xorl %r14d, %r14d +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB11_3: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl %r14d, %eax +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: divl %ebx +; CHECK-NEXT: movl %edx, %edi +; CHECK-NEXT: callq use.i32@PLT +; CHECK-NEXT: incl %r14d +; CHECK-NEXT: cmpl %r14d, %ebp +; CHECK-NEXT: jne .LBB11_3 +; CHECK-NEXT: # %bb.4: # %for.cond.cleanup +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: retq +entry: + %cmp3.not = icmp eq i32 %N, 0 + br i1 %cmp3.not, label %for.body0, label %for.body1 + +for.cond.cleanup: + ret void + +for.body0: + br label %for.body + +for.body1: + br label %for.body + +for.body: + %i.04 = phi i32 [ %inc, %for.body ], [ 0, %for.body0 ], [ 1, %for.body1 ] + %rem = urem i32 %i.04, %rem_amt + tail call void @use.i32(i32 %rem) + %inc = add nuw i32 %i.04, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @simple_urem_multi_latch_non_canonical(i32 %N, i32 %rem_amt) nounwind { +; CHECK-LABEL: simple_urem_multi_latch_non_canonical: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: je .LBB12_6 +; CHECK-NEXT: # %bb.1: # %for.body.preheader +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %r13 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: movl %esi, %ebx +; CHECK-NEXT: movl %edi, %ebp +; CHECK-NEXT: decl %ebp +; CHECK-NEXT: xorl %r12d, %r12d +; CHECK-NEXT: xorl %r14d, %r14d +; CHECK-NEXT: xorl %r13d, %r13d +; CHECK-NEXT: jmp .LBB12_2 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB12_3: # %for.body.backedge +; CHECK-NEXT: # in Loop: Header=BB12_2 Depth=1 +; CHECK-NEXT: incl %r14d +; CHECK-NEXT: cmpl %ebx, %r14d +; CHECK-NEXT: cmovel %r12d, %r14d +; CHECK-NEXT: incl %r13d +; CHECK-NEXT: .LBB12_2: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl %r14d, %edi +; CHECK-NEXT: callq use.i32@PLT +; CHECK-NEXT: callq get.i1@PLT +; CHECK-NEXT: movl %eax, %r15d +; CHECK-NEXT: callq do_stuff0@PLT +; CHECK-NEXT: testb $1, %r15b +; CHECK-NEXT: je .LBB12_3 +; CHECK-NEXT: # %bb.4: # %for.body0 +; CHECK-NEXT: # in Loop: Header=BB12_2 Depth=1 +; CHECK-NEXT: callq do_stuff1@PLT +; CHECK-NEXT: cmpl %r13d, %ebp +; CHECK-NEXT: jne .LBB12_3 +; CHECK-NEXT: # %bb.5: +; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: popq %r13 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .LBB12_6: # %for.cond.cleanup +; CHECK-NEXT: retq +entry: + %cmp3.not = icmp eq i32 %N, 0 + br i1 %cmp3.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi i32 [ %inc, %for.body ], [ %inc, %for.body0 ], [ 0, %entry ] + %rem = urem i32 %i.04, %rem_amt + tail call void @use.i32(i32 %rem) + %inc = add nuw i32 %i.04, 1 + %cond = call i1 @get.i1() + call void @do_stuff0() + br i1 %cond, label %for.body0, label %for.body +for.body0: + call void @do_stuff1() + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @simple_urem_fail_bad_loop(i32 %N, i32 %rem_amt) nounwind { +; CHECK-LABEL: simple_urem_fail_bad_loop: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movl %esi, %ebx +; CHECK-NEXT: movl %edi, %ebp +; CHECK-NEXT: callq get.i32@PLT +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: # implicit-def: $r14d +; CHECK-NEXT: jne .LBB13_4 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: xorl %r14d, %r14d +; CHECK-NEXT: .LBB13_2: # %for.cond +; CHECK-NEXT: cmpl %ebp, %r14d +; CHECK-NEXT: jae .LBB13_5 +; CHECK-NEXT: # %bb.3: # %for.body +; CHECK-NEXT: movl %r14d, %edi +; CHECK-NEXT: xorl $1, %edi +; CHECK-NEXT: callq use.i32@PLT +; CHECK-NEXT: .LBB13_4: # %halfway +; CHECK-NEXT: movl %r14d, %eax +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: divl %ebx +; CHECK-NEXT: movl %edx, %edi +; CHECK-NEXT: callq use.i32@PLT +; CHECK-NEXT: incl %r14d +; CHECK-NEXT: jmp .LBB13_2 +; CHECK-NEXT: .LBB13_5: # %for.end +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: retq +entry: + %call = call i32 @get.i32() + %tobool.not = icmp eq i32 %call, 0 + br i1 %tobool.not, label %for.cond, label %halfway + +for.cond: + %i.0 = phi i32 [ %inc, %halfway ], [ 0, %entry ] + %cmp = icmp ult i32 %i.0, %N + br i1 %cmp, label %for.body, label %for.end + +for.body: + %xor = xor i32 %i.0, 1 + call void @use.i32(i32 %xor) + br label %halfway + +halfway: + %i.1 = phi i32 [ poison, %entry ], [ %i.0, %for.body ] + %rem = urem i32 %i.1, %rem_amt + call void @use.i32(i32 %rem) + %inc = add nuw i32 %i.1, 1 + br label %for.cond + +for.end: + ret void +} + +define void @simple_urem_fail_intermediate_inc(i32 %N, i32 %rem_amt) nounwind { +; CHECK-LABEL: simple_urem_fail_intermediate_inc: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: je .LBB14_4 +; CHECK-NEXT: # %bb.1: # %for.body.preheader +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movl %esi, %ebx +; CHECK-NEXT: movl %edi, %r14d +; CHECK-NEXT: negl %r14d +; CHECK-NEXT: movl $1, %r15d +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB14_2: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl %r15d, %eax +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: divl %ebx +; CHECK-NEXT: movl %edx, %edi +; CHECK-NEXT: callq use.i32@PLT +; CHECK-NEXT: leal 1(%r14,%r15), %eax +; CHECK-NEXT: movl %r15d, %ecx +; CHECK-NEXT: incl %ecx +; CHECK-NEXT: cmpl $1, %eax +; CHECK-NEXT: movl %ecx, %r15d +; CHECK-NEXT: jne .LBB14_2 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: .LBB14_4: # %for.cond.cleanup +; CHECK-NEXT: retq +entry: + %cmp3.not = icmp eq i32 %N, 0 + br i1 %cmp3.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %inc2 = add nuw i32 %i.04, 1 + %rem = urem i32 %inc2, %rem_amt + tail call void @use.i32(i32 %rem) + %inc = add nuw i32 %i.04, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @weird_loop(i64 %sub.ptr.div.i56) personality ptr null { +; CHECK-LABEL: weird_loop: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB15_1: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: jmp .LBB15_1 +entry: + br label %for.preheader + +for.preheader: + %i57.0540.us = phi i64 [ 0, %entry ], [ %add74.us, %for.body ] + %add74.us = add nuw i64 %i57.0540.us, 1 + br label %for.body + +for.body: + %rem.us = urem i64 %i57.0540.us, %sub.ptr.div.i56 + br i1 false, label %for.preheader, label %for.body +} + +define void @simple_urem_to_sel_non_zero_start(i32 %N, i32 %rem_amt) nounwind { +; CHECK-LABEL: simple_urem_to_sel_non_zero_start: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpl $3, %edi +; CHECK-NEXT: jb .LBB16_4 +; CHECK-NEXT: # %bb.1: # %for.body.preheader +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movl %esi, %ebx +; CHECK-NEXT: movl %edi, %ebp +; CHECK-NEXT: movl $2, %r14d +; CHECK-NEXT: xorl %r15d, %r15d +; CHECK-NEXT: movl $2, %r12d +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB16_2: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl %r14d, %edi +; CHECK-NEXT: callq use.i32@PLT +; CHECK-NEXT: incl %r14d +; CHECK-NEXT: cmpl %ebx, %r14d +; CHECK-NEXT: cmovel %r15d, %r14d +; CHECK-NEXT: incl %r12d +; CHECK-NEXT: cmpl %r12d, %ebp +; CHECK-NEXT: jne .LBB16_2 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .LBB16_4: # %for.cond.cleanup +; CHECK-NEXT: retq +entry: + %cmp3.not = icmp ult i32 %N, 3 + br i1 %cmp3.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi i32 [ %inc, %for.body ], [ 2, %entry ] + %rem = urem i32 %i.04, %rem_amt + tail call void @use.i32(i32 %rem) + %inc = add nuw i32 %i.04, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @simple_urem_to_sel_non_zero_start_through_add(i32 %N, i32 %rem_amt_in) nounwind { +; CHECK-LABEL: simple_urem_to_sel_non_zero_start_through_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpl $3, %edi +; CHECK-NEXT: jb .LBB17_4 +; CHECK-NEXT: # %bb.1: # %for.body.preheader +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movl %esi, %ebx +; CHECK-NEXT: movl %edi, %r14d +; CHECK-NEXT: orl $16, %ebx +; CHECK-NEXT: negl %r14d +; CHECK-NEXT: movl $7, %r15d +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB17_2: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl %r15d, %eax +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: divl %ebx +; CHECK-NEXT: movl %edx, %edi +; CHECK-NEXT: callq use.i32@PLT +; CHECK-NEXT: leal 1(%r14,%r15), %eax +; CHECK-NEXT: movl %r15d, %ecx +; CHECK-NEXT: incl %ecx +; CHECK-NEXT: cmpl $5, %eax +; CHECK-NEXT: movl %ecx, %r15d +; CHECK-NEXT: jne .LBB17_2 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: .LBB17_4: # %for.cond.cleanup +; CHECK-NEXT: retq +entry: + %rem_amt = or i32 %rem_amt_in, 16 + %cmp3.not = icmp ult i32 %N, 3 + br i1 %cmp3.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi i32 [ %inc, %for.body ], [ 2, %entry ] + %i_with_off = add nuw i32 %i.04, 5 + %rem = urem i32 %i_with_off, %rem_amt + tail call void @use.i32(i32 %rem) + %inc = add nuw i32 %i.04, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @simple_urem_to_sel_non_zero_start_through_add_fail_missing_nuw(i32 %N, i32 %rem_amt_in) nounwind { +; CHECK-LABEL: simple_urem_to_sel_non_zero_start_through_add_fail_missing_nuw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpl $3, %edi +; CHECK-NEXT: jb .LBB18_4 +; CHECK-NEXT: # %bb.1: # %for.body.preheader +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movl %esi, %ebx +; CHECK-NEXT: movl %edi, %r14d +; CHECK-NEXT: orl $16, %ebx +; CHECK-NEXT: negl %r14d +; CHECK-NEXT: movl $7, %r15d +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB18_2: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl %r15d, %eax +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: divl %ebx +; CHECK-NEXT: movl %edx, %edi +; CHECK-NEXT: callq use.i32@PLT +; CHECK-NEXT: leal 1(%r14,%r15), %eax +; CHECK-NEXT: movl %r15d, %ecx +; CHECK-NEXT: incl %ecx +; CHECK-NEXT: cmpl $5, %eax +; CHECK-NEXT: movl %ecx, %r15d +; CHECK-NEXT: jne .LBB18_2 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: .LBB18_4: # %for.cond.cleanup +; CHECK-NEXT: retq +entry: + %rem_amt = or i32 %rem_amt_in, 16 + %cmp3.not = icmp ult i32 %N, 3 + br i1 %cmp3.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi i32 [ %inc, %for.body ], [ 2, %entry ] + %i_with_off = add i32 %i.04, 5 + %rem = urem i32 %i_with_off, %rem_amt + tail call void @use.i32(i32 %rem) + %inc = add nuw i32 %i.04, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @simple_urem_to_sel_non_zero_start_through_add_fail_no_simplify_rem(i32 %N, i32 %rem_amt) nounwind { +; CHECK-LABEL: simple_urem_to_sel_non_zero_start_through_add_fail_no_simplify_rem: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpl $3, %edi +; CHECK-NEXT: jb .LBB19_4 +; CHECK-NEXT: # %bb.1: # %for.body.preheader +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movl %esi, %ebx +; CHECK-NEXT: movl %edi, %r14d +; CHECK-NEXT: negl %r14d +; CHECK-NEXT: movl $7, %r15d +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB19_2: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl %r15d, %eax +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: divl %ebx +; CHECK-NEXT: movl %edx, %edi +; CHECK-NEXT: callq use.i32@PLT +; CHECK-NEXT: leal 1(%r14,%r15), %eax +; CHECK-NEXT: movl %r15d, %ecx +; CHECK-NEXT: incl %ecx +; CHECK-NEXT: cmpl $5, %eax +; CHECK-NEXT: movl %ecx, %r15d +; CHECK-NEXT: jne .LBB19_2 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: .LBB19_4: # %for.cond.cleanup +; CHECK-NEXT: retq +entry: + %cmp3.not = icmp ult i32 %N, 3 + br i1 %cmp3.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi i32 [ %inc, %for.body ], [ 2, %entry ] + %i_with_off = add nuw i32 %i.04, 5 + %rem = urem i32 %i_with_off, %rem_amt + tail call void @use.i32(i32 %rem) + %inc = add nuw i32 %i.04, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @simple_urem_to_sel_non_zero_start_through_sub(i32 %N, i32 %rem_amt, i32 %start) nounwind { +; CHECK-LABEL: simple_urem_to_sel_non_zero_start_through_sub: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movl %edi, %ebp +; CHECK-NEXT: subl %edx, %ebp +; CHECK-NEXT: jbe .LBB20_3 +; CHECK-NEXT: # %bb.1: # %for.body.preheader +; CHECK-NEXT: movl %esi, %ebx +; CHECK-NEXT: xorl %r15d, %r15d +; CHECK-NEXT: xorl %r14d, %r14d +; CHECK-NEXT: xorl %r12d, %r12d +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB20_2: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl %r14d, %edi +; CHECK-NEXT: callq use.i32@PLT +; CHECK-NEXT: incl %r14d +; CHECK-NEXT: cmpl %ebx, %r14d +; CHECK-NEXT: cmovel %r15d, %r14d +; CHECK-NEXT: incl %r12d +; CHECK-NEXT: cmpl %r12d, %ebp +; CHECK-NEXT: jne .LBB20_2 +; CHECK-NEXT: .LBB20_3: # %for.cond.cleanup +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: retq +entry: + %cmp3.not = icmp ule i32 %N, %start + br i1 %cmp3.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi i32 [ %inc, %for.body ], [ %start, %entry ] + %i_with_off = sub nuw i32 %i.04, %start + %rem = urem i32 %i_with_off, %rem_amt + tail call void @use.i32(i32 %rem) + %inc = add nuw i32 %i.04, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @simple_urem_to_sel_non_zero_start_through_sub_no_simplfy(i32 %N, i32 %rem_amt, i32 %start) nounwind { +; CHECK-LABEL: simple_urem_to_sel_non_zero_start_through_sub_no_simplfy: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpl %edx, %edi +; CHECK-NEXT: jbe .LBB21_4 +; CHECK-NEXT: # %bb.1: # %for.body.preheader +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movl %edx, %r15d +; CHECK-NEXT: movl %esi, %ebx +; CHECK-NEXT: movl %edi, %r14d +; CHECK-NEXT: negl %r14d +; CHECK-NEXT: addl $-2, %r15d +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB21_2: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl %r15d, %eax +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: divl %ebx +; CHECK-NEXT: movl %edx, %edi +; CHECK-NEXT: callq use.i32@PLT +; CHECK-NEXT: leal 1(%r14,%r15), %eax +; CHECK-NEXT: movl %r15d, %ecx +; CHECK-NEXT: incl %ecx +; CHECK-NEXT: cmpl $-2, %eax +; CHECK-NEXT: movl %ecx, %r15d +; CHECK-NEXT: jne .LBB21_2 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: .LBB21_4: # %for.cond.cleanup +; CHECK-NEXT: retq +entry: + %cmp3.not = icmp ule i32 %N, %start + br i1 %cmp3.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi i32 [ %inc, %for.body ], [ %start, %entry ] + %i_with_off = sub nuw i32 %i.04, 2 + %rem = urem i32 %i_with_off, %rem_amt + tail call void @use.i32(i32 %rem) + %inc = add nuw i32 %i.04, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/fold-loop-of-urem.ll b/llvm/test/Transforms/CodeGenPrepare/X86/fold-loop-of-urem.ll new file mode 100644 index 00000000000000..304ae337ed4197 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/fold-loop-of-urem.ll @@ -0,0 +1,879 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -mtriple=x86_64-unknown-unknown --loop-simplify -codegenprepare -S | FileCheck %s + +declare void @use.i32(i32) +declare void @use.2xi64(<2 x i64>) +declare void @do_stuff0() +declare void @do_stuff1() +declare i1 @get.i1() +declare i32 @get.i32() + +define void @simple_urem_to_sel(i32 %N, i32 %rem_amt) nounwind { +; CHECK-LABEL: define void @simple_urem_to_sel( +; CHECK-SAME: i32 [[N:%.*]], i32 [[REM_AMT:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp eq i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP3_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[REM:%.*]] = phi i32 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[TMP3:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: tail call void @use.i32(i32 [[REM]]) +; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[REM]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], [[REM_AMT]] +; CHECK-NEXT: [[TMP3]] = select i1 [[TMP2]], i32 0, i32 [[TMP1]] +; CHECK-NEXT: [[INC]] = add nuw i32 [[I_04]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] +; +entry: + %cmp3.not = icmp eq i32 %N, 0 + br i1 %cmp3.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %rem = urem i32 %i.04, %rem_amt + tail call void @use.i32(i32 %rem) + %inc = add nuw i32 %i.04, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @simple_urem_to_sel_nested2(i32 %N, i32 %rem_amt) nounwind { +; CHECK-LABEL: define void @simple_urem_to_sel_nested2( +; CHECK-SAME: i32 [[N:%.*]], i32 [[REM_AMT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp eq i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP3_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[REM:%.*]] = phi i32 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[TMP3:%.*]], %[[FOR_BODY_TAIL:.*]] ] +; CHECK-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_BODY_TAIL]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[COND0:%.*]] = call i1 @get.i1() +; CHECK-NEXT: br i1 [[COND0]], label %[[FOR_BODY0:.*]], label %[[FOR_BODY_TAIL]] +; CHECK: [[FOR_BODY0]]: +; CHECK-NEXT: [[COND1:%.*]] = call i1 @get.i1() +; CHECK-NEXT: br i1 [[COND1]], label %[[FOR_BODY1:.*]], label %[[FOR_BODY2:.*]] +; CHECK: [[FOR_BODY2]]: +; CHECK-NEXT: [[COND2:%.*]] = call i1 @get.i1() +; CHECK-NEXT: br i1 [[COND2]], label %[[FOR_BODY1]], label %[[FOR_BODY_TAIL]] +; CHECK: [[FOR_BODY1]]: +; CHECK-NEXT: tail call void @use.i32(i32 [[REM]]) +; CHECK-NEXT: br label %[[FOR_BODY_TAIL]] +; CHECK: [[FOR_BODY_TAIL]]: +; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[REM]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], [[REM_AMT]] +; CHECK-NEXT: [[TMP3]] = select i1 [[TMP2]], i32 0, i32 [[TMP1]] +; CHECK-NEXT: [[INC]] = add nuw i32 [[I_04]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] +; +entry: + %cmp3.not = icmp eq i32 %N, 0 + br i1 %cmp3.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi i32 [ %inc, %for.body.tail ], [ 0, %entry ] + %cond0 = call i1 @get.i1() + br i1 %cond0, label %for.body0, label %for.body.tail +for.body0: + %cond1 = call i1 @get.i1() + br i1 %cond1, label %for.body1, label %for.body2 +for.body2: + %cond2 = call i1 @get.i1() + br i1 %cond2, label %for.body1, label %for.body.tail +for.body1: + %rem = urem i32 %i.04, %rem_amt + tail call void @use.i32(i32 %rem) + br label %for.body.tail +for.body.tail: + %inc = add nuw i32 %i.04, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @simple_urem_fail_bad_incr3(i32 %N, i32 %rem_amt) nounwind { +; CHECK-LABEL: define void @simple_urem_fail_bad_incr3( +; CHECK-SAME: i32 [[N:%.*]], i32 [[REM_AMT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp eq i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP3_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[COND0:%.*]] = call i1 @get.i1() +; CHECK-NEXT: br i1 [[COND0]], label %[[FOR_BODY0:.*]], label %[[FOR_BODY2:.*]] +; CHECK: [[FOR_BODY0]]: +; CHECK-NEXT: [[COND1:%.*]] = call i1 @get.i1() +; CHECK-NEXT: [[VAL:%.*]] = call i32 @get.i32() +; CHECK-NEXT: [[INC:%.*]] = add nuw i32 [[VAL]], 1 +; CHECK-NEXT: br i1 [[COND1]], label %[[FOR_BODY1:.*]], label %[[FOR_BODY_TAIL:.*]] +; CHECK: [[FOR_BODY2]]: +; CHECK-NEXT: [[COND2:%.*]] = call i1 @get.i1() +; CHECK-NEXT: br i1 [[COND2]], label %[[FOR_BODY1]], label %[[FOR_BODY_TAIL]] +; CHECK: [[FOR_BODY1]]: +; CHECK-NEXT: [[I_04:%.*]] = phi i32 [ [[INC]], %[[FOR_BODY0]] ], [ 0, %[[FOR_BODY2]] ] +; CHECK-NEXT: [[REM:%.*]] = urem i32 [[I_04]], [[REM_AMT]] +; CHECK-NEXT: tail call void @use.i32(i32 [[REM]]) +; CHECK-NEXT: br label %[[FOR_BODY_TAIL]] +; CHECK: [[FOR_BODY_TAIL]]: +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = call i1 @get.i1() +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] +; +entry: + %cmp3.not = icmp eq i32 %N, 0 + br i1 %cmp3.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %cond0 = call i1 @get.i1() + br i1 %cond0, label %for.body0, label %for.body2 +for.body0: + %cond1 = call i1 @get.i1() + %val = call i32 @get.i32() + %inc = add nuw i32 %val, 1 + br i1 %cond1, label %for.body1, label %for.body.tail +for.body2: + %cond2 = call i1 @get.i1() + br i1 %cond2, label %for.body1, label %for.body.tail +for.body1: + %i.04 = phi i32 [ %inc, %for.body0], [ 0, %for.body2 ] + %rem = urem i32 %i.04, %rem_amt + tail call void @use.i32(i32 %rem) + br label %for.body.tail +for.body.tail: + %exitcond.not = call i1 @get.i1() + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @simple_urem_to_sel_vec(<2 x i64> %rem_amt) nounwind { +; CHECK-LABEL: define void @simple_urem_to_sel_vec( +; CHECK-SAME: <2 x i64> [[REM_AMT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP:.*]]: +; CHECK-NEXT: ret void +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[REM:%.*]] = phi <2 x i64> [ zeroinitializer, %[[ENTRY]] ], [ [[TMP3:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[I_04:%.*]] = phi <2 x i64> [ [[INC:%.*]], %[[FOR_BODY]] ], [ zeroinitializer, %[[ENTRY]] ] +; CHECK-NEXT: tail call void @use.2xi64(<2 x i64> [[REM]]) +; CHECK-NEXT: [[TMP1:%.*]] = add nuw <2 x i64> [[REM]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], [[REM_AMT]] +; CHECK-NEXT: [[TMP3]] = select <2 x i1> [[TMP2]], <2 x i64> zeroinitializer, <2 x i64> [[TMP1]] +; CHECK-NEXT: [[INC]] = add nuw <2 x i64> [[I_04]], +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = call i1 @get.i1() +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] +; +entry: + br label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi <2 x i64> [ %inc, %for.body ], [ zeroinitializer, %entry ] + %rem = urem <2 x i64> %i.04, %rem_amt + tail call void @use.2xi64(<2 x i64> %rem) + %inc = add nuw <2 x i64> %i.04, + %exitcond.not = call i1 @get.i1() + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @simple_urem_fail_bad_incr(i32 %N, i32 %rem_amt) nounwind { +; CHECK-LABEL: define void @simple_urem_fail_bad_incr( +; CHECK-SAME: i32 [[N:%.*]], i32 [[REM_AMT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp eq i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP3_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[I_03:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_BODY_TAIL:.*]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[COND0:%.*]] = call i1 @get.i1() +; CHECK-NEXT: br i1 [[COND0]], label %[[FOR_BODY0:.*]], label %[[FOR_BODY_TAIL]] +; CHECK: [[FOR_BODY0]]: +; CHECK-NEXT: [[SOME_VAL:%.*]] = call i32 @get.i32() +; CHECK-NEXT: br label %[[FOR_BODY_TAIL]] +; CHECK: [[FOR_BODY_TAIL]]: +; CHECK-NEXT: [[I_04:%.*]] = phi i32 [ [[I_03]], %[[FOR_BODY]] ], [ [[SOME_VAL]], %[[FOR_BODY0]] ] +; CHECK-NEXT: [[REM:%.*]] = urem i32 [[I_04]], [[REM_AMT]] +; CHECK-NEXT: tail call void @use.i32(i32 [[REM]]) +; CHECK-NEXT: [[INC]] = add nuw i32 [[I_04]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] +; +entry: + %cmp3.not = icmp eq i32 %N, 0 + br i1 %cmp3.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.03 = phi i32 [ %inc, %for.body.tail ], [ 0, %entry ] + %cond0 = call i1 @get.i1() + br i1 %cond0, label %for.body0, label %for.body.tail +for.body0: + %some_val = call i32 @get.i32() + br label %for.body.tail + +for.body.tail: + %i.04 = phi i32 [ %i.03, %for.body ], [ %some_val, %for.body0 ] + %rem = urem i32 %i.04, %rem_amt + tail call void @use.i32(i32 %rem) + %inc = add nuw i32 %i.04, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @simple_urem_to_sel_second_acc(i32 %N, i32 %rem_amt) nounwind { +; CHECK-LABEL: define void @simple_urem_to_sel_second_acc( +; CHECK-SAME: i32 [[N:%.*]], i32 [[REM_AMT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp ult i32 [[N]], 2 +; CHECK-NEXT: br i1 [[CMP3_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[REM:%.*]] = phi i32 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[TMP3:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ [[INC2:%.*]], %[[FOR_BODY]] ], [ 1, %[[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: tail call void @use.i32(i32 [[REM]]) +; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[REM]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], [[REM_AMT]] +; CHECK-NEXT: [[TMP3]] = select i1 [[TMP2]], i32 0, i32 [[TMP1]] +; CHECK-NEXT: [[INC]] = add nuw i32 [[I_04]], 1 +; CHECK-NEXT: [[INC2]] = add nuw i32 [[I_05]], 2 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp ugt i32 [[INC2]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] +; +entry: + %cmp3.not = icmp ult i32 %N, 2 + br i1 %cmp3.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %i.05 = phi i32 [ %inc2, %for.body ], [ 1, %entry ] + %rem = urem i32 %i.04, %rem_amt + tail call void @use.i32(i32 %rem) + %inc = add nuw i32 %i.04, 1 + %inc2 = add nuw i32 %i.05, 2 + %exitcond.not = icmp ugt i32 %inc2, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @simple_urem_fail_srem(i32 %N, i32 %rem_amt) nounwind { +; CHECK-LABEL: define void @simple_urem_fail_srem( +; CHECK-SAME: i32 [[N:%.*]], i32 [[REM_AMT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp eq i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP3_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[REM:%.*]] = srem i32 [[I_04]], [[REM_AMT]] +; CHECK-NEXT: tail call void @use.i32(i32 [[REM]]) +; CHECK-NEXT: [[INC]] = add nuw i32 [[I_04]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] +; +entry: + %cmp3.not = icmp eq i32 %N, 0 + br i1 %cmp3.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %rem = srem i32 %i.04, %rem_amt + tail call void @use.i32(i32 %rem) + %inc = add nuw i32 %i.04, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @simple_urem_fail_missing_nuw(i32 %N, i32 %rem_amt) nounwind { +; CHECK-LABEL: define void @simple_urem_fail_missing_nuw( +; CHECK-SAME: i32 [[N:%.*]], i32 [[REM_AMT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp eq i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP3_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[REM:%.*]] = urem i32 [[I_04]], [[REM_AMT]] +; CHECK-NEXT: tail call void @use.i32(i32 [[REM]]) +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_04]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] +; +entry: + %cmp3.not = icmp eq i32 %N, 0 + br i1 %cmp3.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %rem = urem i32 %i.04, %rem_amt + tail call void @use.i32(i32 %rem) + %inc = add nsw i32 %i.04, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @simple_urem_fail_bad_incr2(i32 %N, i32 %rem_amt) nounwind { +; CHECK-LABEL: define void @simple_urem_fail_bad_incr2( +; CHECK-SAME: i32 [[N:%.*]], i32 [[REM_AMT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp eq i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP3_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[REM:%.*]] = urem i32 [[I_04]], [[REM_AMT]] +; CHECK-NEXT: tail call void @use.i32(i32 [[REM]]) +; CHECK-NEXT: [[INC]] = add nuw i32 [[I_04]], 2 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] +; +entry: + %cmp3.not = icmp eq i32 %N, 0 + br i1 %cmp3.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %rem = urem i32 %i.04, %rem_amt + tail call void @use.i32(i32 %rem) + %inc = add nuw i32 %i.04, 2 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @simple_urem_non_zero_entry4(i32 %N, i32 %rem_amt) nounwind { +; CHECK-LABEL: define void @simple_urem_non_zero_entry4( +; CHECK-SAME: i32 [[N:%.*]], i32 [[REM_AMT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp eq i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP3_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[REM:%.*]] = phi i32 [ 4, %[[FOR_BODY_PREHEADER]] ], [ [[TMP3:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 4, %[[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: tail call void @use.i32(i32 [[REM]]) +; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[REM]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], [[REM_AMT]] +; CHECK-NEXT: [[TMP3]] = select i1 [[TMP2]], i32 0, i32 [[TMP1]] +; CHECK-NEXT: [[INC]] = add nuw i32 [[I_04]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] +; +entry: + %cmp3.not = icmp eq i32 %N, 0 + br i1 %cmp3.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi i32 [ %inc, %for.body ], [ 4, %entry ] + %rem = urem i32 %i.04, %rem_amt + tail call void @use.i32(i32 %rem) + %inc = add nuw i32 %i.04, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @simple_urem_skip_const_rem_amt(i32 %N) nounwind { +; CHECK-LABEL: define void @simple_urem_skip_const_rem_amt( +; CHECK-SAME: i32 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp eq i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP3_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 4, %[[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[REM:%.*]] = urem i32 [[I_04]], 19 +; CHECK-NEXT: tail call void @use.i32(i32 [[REM]]) +; CHECK-NEXT: [[INC]] = add nuw i32 [[I_04]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] +; +entry: + %cmp3.not = icmp eq i32 %N, 0 + br i1 %cmp3.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi i32 [ %inc, %for.body ], [ 4, %entry ] + %rem = urem i32 %i.04, 19 + tail call void @use.i32(i32 %rem) + %inc = add nuw i32 %i.04, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @simple_urem_fail_no_preheader_non_canonical(i32 %N, i32 %rem_amt) nounwind { +; CHECK-LABEL: define void @simple_urem_fail_no_preheader_non_canonical( +; CHECK-SAME: i32 [[N:%.*]], i32 [[REM_AMT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp eq i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP3_NOT]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_BODY1:.*]] +; CHECK: [[FOR_COND_CLEANUP:.*]]: +; CHECK-NEXT: ret void +; CHECK: [[FOR_BODY1]]: +; CHECK-NEXT: br label %[[FOR_BODY_PREHEADER]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: [[I_04_PH:%.*]] = phi i32 [ 1, %[[FOR_BODY1]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[REM:%.*]] = phi i32 [ [[I_04_PH]], %[[FOR_BODY_PREHEADER]] ], [ [[TMP3:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[I_04_PH]], %[[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: tail call void @use.i32(i32 [[REM]]) +; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[REM]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], [[REM_AMT]] +; CHECK-NEXT: [[TMP3]] = select i1 [[TMP2]], i32 0, i32 [[TMP1]] +; CHECK-NEXT: [[INC]] = add nuw i32 [[I_04]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] +; +entry: + %cmp3.not = icmp eq i32 %N, 0 + br i1 %cmp3.not, label %for.body0, label %for.body1 + +for.cond.cleanup: + ret void + +for.body0: + br label %for.body + +for.body1: + br label %for.body + +for.body: + %i.04 = phi i32 [ %inc, %for.body ], [ 0, %for.body0 ], [ 1, %for.body1 ] + %rem = urem i32 %i.04, %rem_amt + tail call void @use.i32(i32 %rem) + %inc = add nuw i32 %i.04, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @simple_urem_multi_latch_non_canonical(i32 %N, i32 %rem_amt) nounwind { +; CHECK-LABEL: define void @simple_urem_multi_latch_non_canonical( +; CHECK-SAME: i32 [[N:%.*]], i32 [[REM_AMT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp eq i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP3_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[I_04:%.*]] = phi i32 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[INC]], %[[FOR_BODY0:.*]] ] +; CHECK-NEXT: [[REM:%.*]] = urem i32 [[I_04]], [[REM_AMT]] +; CHECK-NEXT: tail call void @use.i32(i32 [[REM]]) +; CHECK-NEXT: [[INC]] = add nuw i32 [[I_04]], 1 +; CHECK-NEXT: [[COND:%.*]] = call i1 @get.i1() +; CHECK-NEXT: call void @do_stuff0() +; CHECK-NEXT: br i1 [[COND]], label %[[FOR_BODY0]], label %[[FOR_BODY]] +; CHECK: [[FOR_BODY0]]: +; CHECK-NEXT: call void @do_stuff1() +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] +; +entry: + %cmp3.not = icmp eq i32 %N, 0 + br i1 %cmp3.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi i32 [ %inc, %for.body ], [ %inc, %for.body0 ], [ 0, %entry ] + %rem = urem i32 %i.04, %rem_amt + tail call void @use.i32(i32 %rem) + %inc = add nuw i32 %i.04, 1 + %cond = call i1 @get.i1() + call void @do_stuff0() + br i1 %cond, label %for.body0, label %for.body +for.body0: + call void @do_stuff1() + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @simple_urem_fail_bad_loop(i32 %N, i32 %rem_amt) nounwind { +; CHECK-LABEL: define void @simple_urem_fail_bad_loop( +; CHECK-SAME: i32 [[N:%.*]], i32 [[REM_AMT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @get.i32() +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[CALL]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[FOR_COND:.*]], label %[[HALFWAY:.*]] +; CHECK: [[FOR_COND]]: +; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ [[INC:%.*]], %[[HALFWAY]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_0]], [[N]] +; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[I_0]], 1 +; CHECK-NEXT: call void @use.i32(i32 [[XOR]]) +; CHECK-NEXT: br label %[[HALFWAY]] +; CHECK: [[HALFWAY]]: +; CHECK-NEXT: [[I_1:%.*]] = phi i32 [ poison, %[[ENTRY]] ], [ [[I_0]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[REM:%.*]] = urem i32 [[I_1]], [[REM_AMT]] +; CHECK-NEXT: call void @use.i32(i32 [[REM]]) +; CHECK-NEXT: [[INC]] = add nuw i32 [[I_1]], 1 +; CHECK-NEXT: br label %[[FOR_COND]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: ret void +; +entry: + %call = call i32 @get.i32() + %tobool.not = icmp eq i32 %call, 0 + br i1 %tobool.not, label %for.cond, label %halfway + +for.cond: + %i.0 = phi i32 [ %inc, %halfway ], [ 0, %entry ] + %cmp = icmp ult i32 %i.0, %N + br i1 %cmp, label %for.body, label %for.end + +for.body: + %xor = xor i32 %i.0, 1 + call void @use.i32(i32 %xor) + br label %halfway + +halfway: + %i.1 = phi i32 [ poison, %entry ], [ %i.0, %for.body ] + %rem = urem i32 %i.1, %rem_amt + call void @use.i32(i32 %rem) + %inc = add nuw i32 %i.1, 1 + br label %for.cond + +for.end: + ret void +} + +define void @simple_urem_fail_intermediate_inc(i32 %N, i32 %rem_amt) nounwind { +; CHECK-LABEL: define void @simple_urem_fail_intermediate_inc( +; CHECK-SAME: i32 [[N:%.*]], i32 [[REM_AMT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp eq i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP3_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[INC2:%.*]] = add nuw i32 [[I_04]], 1 +; CHECK-NEXT: [[REM:%.*]] = urem i32 [[INC2]], [[REM_AMT]] +; CHECK-NEXT: tail call void @use.i32(i32 [[REM]]) +; CHECK-NEXT: [[INC]] = add nuw i32 [[I_04]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] +; +entry: + %cmp3.not = icmp eq i32 %N, 0 + br i1 %cmp3.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %inc2 = add nuw i32 %i.04, 1 + %rem = urem i32 %inc2, %rem_amt + tail call void @use.i32(i32 %rem) + %inc = add nuw i32 %i.04, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @weird_loop(i64 %sub.ptr.div.i56) personality ptr null { +; CHECK-LABEL: define void @weird_loop( +; CHECK-SAME: i64 [[SUB_PTR_DIV_I56:%.*]]) personality ptr null { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[ADD74_US:%.*]] = add nuw i64 0, 1 +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[REM_US:%.*]] = urem i64 0, [[SUB_PTR_DIV_I56]] +; CHECK-NEXT: br label %[[FOR_BODY]] +; +entry: + br label %for.preheader + +for.preheader: + %i57.0540.us = phi i64 [ 0, %entry ], [ %add74.us, %for.body ] + %add74.us = add nuw i64 %i57.0540.us, 1 + br label %for.body + +for.body: + %rem.us = urem i64 %i57.0540.us, %sub.ptr.div.i56 + br i1 false, label %for.preheader, label %for.body +} + +define void @simple_urem_to_sel_non_zero_start(i32 %N, i32 %rem_amt) nounwind { +; CHECK-LABEL: define void @simple_urem_to_sel_non_zero_start( +; CHECK-SAME: i32 [[N:%.*]], i32 [[REM_AMT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp ult i32 [[N]], 3 +; CHECK-NEXT: br i1 [[CMP3_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[REM:%.*]] = phi i32 [ 2, %[[FOR_BODY_PREHEADER]] ], [ [[TMP3:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 2, %[[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: tail call void @use.i32(i32 [[REM]]) +; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[REM]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], [[REM_AMT]] +; CHECK-NEXT: [[TMP3]] = select i1 [[TMP2]], i32 0, i32 [[TMP1]] +; CHECK-NEXT: [[INC]] = add nuw i32 [[I_04]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] +; +entry: + %cmp3.not = icmp ult i32 %N, 3 + br i1 %cmp3.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi i32 [ %inc, %for.body ], [ 2, %entry ] + %rem = urem i32 %i.04, %rem_amt + tail call void @use.i32(i32 %rem) + %inc = add nuw i32 %i.04, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @simple_urem_to_sel_non_zero_start_through_add(i32 %N, i32 %rem_amt_in) nounwind { +; CHECK-LABEL: define void @simple_urem_to_sel_non_zero_start_through_add( +; CHECK-SAME: i32 [[N:%.*]], i32 [[REM_AMT_IN:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[REM_AMT:%.*]] = or i32 [[REM_AMT_IN]], 16 +; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp ult i32 [[N]], 3 +; CHECK-NEXT: br i1 [[CMP3_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 2, %[[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[I_WITH_OFF:%.*]] = add nuw i32 [[I_04]], 5 +; CHECK-NEXT: [[REM:%.*]] = urem i32 [[I_WITH_OFF]], [[REM_AMT]] +; CHECK-NEXT: tail call void @use.i32(i32 [[REM]]) +; CHECK-NEXT: [[INC]] = add nuw i32 [[I_04]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] +; +entry: + %rem_amt = or i32 %rem_amt_in, 16 + %cmp3.not = icmp ult i32 %N, 3 + br i1 %cmp3.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi i32 [ %inc, %for.body ], [ 2, %entry ] + %i_with_off = add nuw i32 %i.04, 5 + %rem = urem i32 %i_with_off, %rem_amt + tail call void @use.i32(i32 %rem) + %inc = add nuw i32 %i.04, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @simple_urem_to_sel_non_zero_start_through_add_fail_missing_nuw(i32 %N, i32 %rem_amt_in) nounwind { +; CHECK-LABEL: define void @simple_urem_to_sel_non_zero_start_through_add_fail_missing_nuw( +; CHECK-SAME: i32 [[N:%.*]], i32 [[REM_AMT_IN:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[REM_AMT:%.*]] = or i32 [[REM_AMT_IN]], 16 +; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp ult i32 [[N]], 3 +; CHECK-NEXT: br i1 [[CMP3_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 2, %[[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[I_WITH_OFF:%.*]] = add i32 [[I_04]], 5 +; CHECK-NEXT: [[REM:%.*]] = urem i32 [[I_WITH_OFF]], [[REM_AMT]] +; CHECK-NEXT: tail call void @use.i32(i32 [[REM]]) +; CHECK-NEXT: [[INC]] = add nuw i32 [[I_04]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] +; +entry: + %rem_amt = or i32 %rem_amt_in, 16 + %cmp3.not = icmp ult i32 %N, 3 + br i1 %cmp3.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi i32 [ %inc, %for.body ], [ 2, %entry ] + %i_with_off = add i32 %i.04, 5 + %rem = urem i32 %i_with_off, %rem_amt + tail call void @use.i32(i32 %rem) + %inc = add nuw i32 %i.04, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @simple_urem_to_sel_non_zero_start_through_add_fail_no_simplify_rem(i32 %N, i32 %rem_amt) nounwind { +; CHECK-LABEL: define void @simple_urem_to_sel_non_zero_start_through_add_fail_no_simplify_rem( +; CHECK-SAME: i32 [[N:%.*]], i32 [[REM_AMT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp ult i32 [[N]], 3 +; CHECK-NEXT: br i1 [[CMP3_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 2, %[[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[I_WITH_OFF:%.*]] = add nuw i32 [[I_04]], 5 +; CHECK-NEXT: [[REM:%.*]] = urem i32 [[I_WITH_OFF]], [[REM_AMT]] +; CHECK-NEXT: tail call void @use.i32(i32 [[REM]]) +; CHECK-NEXT: [[INC]] = add nuw i32 [[I_04]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] +; +entry: + %cmp3.not = icmp ult i32 %N, 3 + br i1 %cmp3.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi i32 [ %inc, %for.body ], [ 2, %entry ] + %i_with_off = add nuw i32 %i.04, 5 + %rem = urem i32 %i_with_off, %rem_amt + tail call void @use.i32(i32 %rem) + %inc = add nuw i32 %i.04, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @simple_urem_to_sel_non_zero_start_through_sub(i32 %N, i32 %rem_amt, i32 %start) nounwind { +; CHECK-LABEL: define void @simple_urem_to_sel_non_zero_start_through_sub( +; CHECK-SAME: i32 [[N:%.*]], i32 [[REM_AMT:%.*]], i32 [[START:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp ule i32 [[N]], [[START]] +; CHECK-NEXT: br i1 [[CMP3_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[START]], %[[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[I_WITH_OFF:%.*]] = sub nuw i32 [[I_04]], [[START]] +; CHECK-NEXT: [[REM:%.*]] = urem i32 [[I_WITH_OFF]], [[REM_AMT]] +; CHECK-NEXT: tail call void @use.i32(i32 [[REM]]) +; CHECK-NEXT: [[INC]] = add nuw i32 [[I_04]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] +; +entry: + %cmp3.not = icmp ule i32 %N, %start + br i1 %cmp3.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi i32 [ %inc, %for.body ], [ %start, %entry ] + %i_with_off = sub nuw i32 %i.04, %start + %rem = urem i32 %i_with_off, %rem_amt + tail call void @use.i32(i32 %rem) + %inc = add nuw i32 %i.04, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define void @simple_urem_to_sel_non_zero_start_through_sub_no_simplfy(i32 %N, i32 %rem_amt, i32 %start) nounwind { +; CHECK-LABEL: define void @simple_urem_to_sel_non_zero_start_through_sub_no_simplfy( +; CHECK-SAME: i32 [[N:%.*]], i32 [[REM_AMT:%.*]], i32 [[START:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp ule i32 [[N]], [[START]] +; CHECK-NEXT: br i1 [[CMP3_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[START]], %[[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[I_WITH_OFF:%.*]] = sub nuw i32 [[I_04]], 2 +; CHECK-NEXT: [[REM:%.*]] = urem i32 [[I_WITH_OFF]], [[REM_AMT]] +; CHECK-NEXT: tail call void @use.i32(i32 [[REM]]) +; CHECK-NEXT: [[INC]] = add nuw i32 [[I_04]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] +; +entry: + %cmp3.not = icmp ule i32 %N, %start + br i1 %cmp3.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.04 = phi i32 [ %inc, %for.body ], [ %start, %entry ] + %i_with_off = sub nuw i32 %i.04, 2 + %rem = urem i32 %i_with_off, %rem_amt + tail call void @use.i32(i32 %rem) + %inc = add nuw i32 %i.04, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +}