diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp index 943bd18c6c8b0a..54fc6ee45d00dd 100644 --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -70,12 +70,6 @@ static cl::opt TailDupIndirectBranchSize( "end with indirect branches."), cl::init(20), cl::Hidden); -static cl::opt TailDupJmpTableLoopSize( - "tail-dup-jmptable-loop-size", - cl::desc("Maximum loop latches to consider tail duplication that are " - "successors of loop header."), - cl::init(128), cl::Hidden); - static cl::opt TailDupVerify("tail-dup-verify", cl::desc("Verify sanity of PHI instructions during taildup"), @@ -569,29 +563,6 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, if (TailBB.isSuccessor(&TailBB)) return false; - // When doing tail-duplication with jumptable loops like: - // 1 -> 2 <-> 3 | - // \ <-> 4 | - // \ <-> 5 | - // \ <-> ... | - // \---> rest | - // quadratic number of edges and much more loops are added to CFG. This - // may cause compile time regression when jumptable is quiet large. - // So set the limit on jumptable cases. - auto isLargeJumpTableLoop = [](const MachineBasicBlock &TailBB) { - const SmallPtrSet Preds(TailBB.pred_begin(), - TailBB.pred_end()); - // Check the basic block has large number of successors, all of them only - // have one successor which is the basic block itself. - return llvm::count_if( - TailBB.successors(), [&](const MachineBasicBlock *SuccBB) { - return Preds.count(SuccBB) && SuccBB->succ_size() == 1; - }) > TailDupJmpTableLoopSize; - }; - - if (isLargeJumpTableLoop(TailBB)) - return false; - // Set the limit on the cost to duplicate. When optimizing for size, // duplicate only one, because one branch instruction can be eliminated to // compensate for the duplication. diff --git a/llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll b/llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll index 25da377ec487b5..2032c7244331c1 100644 --- a/llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll +++ b/llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll @@ -1,48 +1,76 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -tail-dup-jmptable-loop-size=5 -mtriple=x86_64-unknown-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s define i8* @large_loop_switch(i8* %p) { ; CHECK-LABEL: large_loop_switch: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset %rbx, -16 -; CHECK-NEXT: movq %rdi, %rsi +; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: movl $6, %ebx -; CHECK-NEXT: movl %ebx, %eax -; CHECK-NEXT: jmpq *.LJTI0_0(,%rax,8) +; CHECK-NEXT: movl %ebx, %ecx +; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8) +; CHECK-NEXT: .LBB0_1: # %for.cond.cleanup +; CHECK-NEXT: movl $530, %edi # imm = 0x212 +; CHECK-NEXT: movq %rax, %rsi +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: jmp ccc@PLT # TAILCALL +; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_2: # %sw.bb1 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: movl $531, %edi # imm = 0x213 -; CHECK-NEXT: .LBB0_3: # %for.body -; CHECK-NEXT: callq ccc@PLT -; CHECK-NEXT: .LBB0_4: # %for.body ; CHECK-NEXT: movq %rax, %rsi +; CHECK-NEXT: callq ccc@PLT ; CHECK-NEXT: decl %ebx -; CHECK-NEXT: movl %ebx, %eax -; CHECK-NEXT: jmpq *.LJTI0_0(,%rax,8) -; CHECK-NEXT: .LBB0_5: # %sw.bb3 +; CHECK-NEXT: movl %ebx, %ecx +; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8) +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_3: # %sw.bb3 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: movl $532, %edi # imm = 0x214 +; CHECK-NEXT: movq %rax, %rsi ; CHECK-NEXT: callq bbb@PLT -; CHECK-NEXT: jmp .LBB0_4 -; CHECK-NEXT: .LBB0_7: # %sw.bb5 +; CHECK-NEXT: decl %ebx +; CHECK-NEXT: movl %ebx, %ecx +; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8) +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_4: # %sw.bb5 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: movl $533, %edi # imm = 0x215 +; CHECK-NEXT: movq %rax, %rsi ; CHECK-NEXT: callq bbb@PLT -; CHECK-NEXT: jmp .LBB0_4 -; CHECK-NEXT: .LBB0_8: # %sw.bb7 +; CHECK-NEXT: decl %ebx +; CHECK-NEXT: movl %ebx, %ecx +; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8) +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_5: # %sw.bb7 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: movl $535, %edi # imm = 0x217 +; CHECK-NEXT: movq %rax, %rsi ; CHECK-NEXT: callq bbb@PLT -; CHECK-NEXT: jmp .LBB0_4 -; CHECK-NEXT: .LBB0_9: # %sw.bb9 +; CHECK-NEXT: decl %ebx +; CHECK-NEXT: movl %ebx, %ecx +; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8) +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_6: # %sw.bb9 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: movl $536, %edi # imm = 0x218 -; CHECK-NEXT: jmp .LBB0_3 -; CHECK-NEXT: .LBB0_10: # %sw.bb11 +; CHECK-NEXT: movq %rax, %rsi +; CHECK-NEXT: callq ccc@PLT +; CHECK-NEXT: decl %ebx +; CHECK-NEXT: movl %ebx, %ecx +; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8) +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_7: # %sw.bb11 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: movl $658, %edi # imm = 0x292 +; CHECK-NEXT: movq %rax, %rsi ; CHECK-NEXT: callq bbb@PLT -; CHECK-NEXT: jmp .LBB0_4 -; CHECK-NEXT: .LBB0_11: # %for.cond.cleanup -; CHECK-NEXT: movl $530, %edi # imm = 0x212 -; CHECK-NEXT: popq %rbx -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: jmp ccc@PLT # TAILCALL +; CHECK-NEXT: decl %ebx +; CHECK-NEXT: movl %ebx, %ecx +; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8) entry: br label %for.body