Skip to content

Commit

Permalink
[CodeGenPrepare] Delete intrinsic call to llvm.assume to enable more …
Browse files Browse the repository at this point in the history
…tailcall

The attached test case is simplified from tcmalloc. Both function calls should be optimized as tailcall. But llvm can only optimize the first call. The second call can't be optimized because function dupRetToEnableTailCallOpts failed to duplicate ret into block case2.

There 2 problems blocked the duplication:

  1 Intrinsic call llvm.assume is not handled by dupRetToEnableTailCallOpts.
  2 The control flow is more complex than expected, dupRetToEnableTailCallOpts can only duplicate ret into its predecessor, but here we have an intermediate block between call and ret.

The solutions:

  1 Since CodeGenPrepare is already at the end of LLVM IR phase, we can simply delete the intrinsic call to llvm.assume.
  2 A general solution to the complex control flow is hard, but for this case, after exit2 is duplicated into case1, exit2 is the only successor of exit1 and exit1 is the only predecessor of exit2, so they can be combined through eliminateFallThrough. But this function is called too late, there is no more dupRetToEnableTailCallOpts after it. We can add an earlier call to eliminateFallThrough to solve it.

Differential Revision: https://reviews.llvm.org/D76539
  • Loading branch information
weiguozhi committed Mar 31, 2020
1 parent 08682dc commit 6d20937
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 4 deletions.
8 changes: 8 additions & 0 deletions llvm/lib/CodeGen/CodeGenPrepare.cpp
Expand Up @@ -495,6 +495,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
if (!LargeOffsetGEPMap.empty())
MadeChange |= splitLargeGEPOffsets();

if (MadeChange)
eliminateFallThrough(F);

// Really free removed instructions during promotion.
for (Instruction *I : RemovedInsts)
I->deleteValue();
Expand Down Expand Up @@ -1964,6 +1967,11 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
if (II) {
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::assume: {
II->eraseFromParent();
return true;
}

case Intrinsic::experimental_widenable_condition: {
// Give up on future widening oppurtunties so that we can fold away dead
// paths and merge blocks before going into block-local instruction
Expand Down
2 changes: 0 additions & 2 deletions llvm/test/Transforms/CodeGenPrepare/X86/extend-sink-hoist.ll
Expand Up @@ -13,7 +13,6 @@ block1:
%s1 = sext i64 %l1 to i128
br label %block2

; CHECK-LABEL: block2:
; CHECK-NEXT: sext
; CHECK-NEXT: load
; CHECK-NEXT: sext
Expand All @@ -34,7 +33,6 @@ block1:
%l1 = load i32, i32* %mem1
br label %block2

; CHECK-LABEL: block2:
; CHECK-NEXT: load
; CHECK-NEXT: sext
block2:
Expand Down
5 changes: 3 additions & 2 deletions llvm/test/Transforms/CodeGenPrepare/X86/optimizeSelect-DT.ll
Expand Up @@ -15,9 +15,10 @@ define i1 @PR41004(i32 %x, i32 %y, i32 %t1) {
; CHECK-NEXT: br label [[SELECT_END]]
; CHECK: select.end:
; CHECK-NEXT: [[MUL:%.*]] = phi i32 [ [[REM]], [[SELECT_TRUE_SINK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[NEG:%.*]] = add i32 [[T1:%.*]], -1
; CHECK-NEXT: [[USUB:%.*]] = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 [[T1:%.*]], i32 1)
; CHECK-NEXT: [[NEG:%.*]] = extractvalue { i32, i1 } [[USUB]], 0
; CHECK-NEXT: [[TOBOOL:%.*]] = extractvalue { i32, i1 } [[USUB]], 1
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[NEG]], [[MUL]]
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[T1]], 0
; CHECK-NEXT: ret i1 [[TOBOOL]]
;
entry:
Expand Down
48 changes: 48 additions & 0 deletions llvm/test/Transforms/CodeGenPrepare/X86/tailcall-assume-xbb.ll
@@ -0,0 +1,48 @@
; RUN: opt -codegenprepare -S -mtriple=x86_64-linux < %s | FileCheck %s

; The ret instruction can be duplicated into BB case2 even though there is an
; intermediate BB exit1 and call to llvm.assume.

@ptr = external global i8*, align 8

; CHECK: %ret1 = tail call i8* @qux()
; CHECK-NEXT: ret i8* %ret1

; CHECK: %ret2 = tail call i8* @bar()
; CHECK-NEXT: ret i8* %ret2

define i8* @foo(i64 %size, i64 %v1, i64 %v2) {
entry:
%cmp1 = icmp ult i64 %size, 1025
br i1 %cmp1, label %if.end, label %case1

case1:
%ret1 = tail call i8* @qux()
br label %exit2

if.end:
%cmp2 = icmp ult i64 %v1, %v2
br i1 %cmp2, label %case3, label %case2

case2:
%ret2 = tail call i8* @bar()
br label %exit1

case3:
%ret3 = load i8*, i8** @ptr, align 8
br label %exit1

exit1:
%retval1 = phi i8* [ %ret2, %case2 ], [ %ret3, %case3 ]
%cmp3 = icmp ne i8* %retval1, null
tail call void @llvm.assume(i1 %cmp3)
br label %exit2

exit2:
%retval2 = phi i8* [ %ret1, %case1 ], [ %retval1, %exit1 ]
ret i8* %retval2
}

declare void @llvm.assume(i1)
declare i8* @qux()
declare i8* @bar()

0 comments on commit 6d20937

Please sign in to comment.