Skip to content

Commit

Permalink
[SimplifyCFG] Ignore ephemeral values when counting insts for threading
Browse files Browse the repository at this point in the history
Ignore ephemeral values (only feeding llvm.assume intrinsics) when
computing the instruction count to decide if a block is small enough for
threading. This is similar to the handling of these values in the
InlineCost computation. These instructions will eventually be removed
and shouldn't count against code size (similar to the existing ignoring
of phis).

Without this change, when enabling -fwhole-program-vtables, which causes
type test / assume sequences to be inserted by clang, we can get
different threading decisions. In particular, when building with
instrumentation FDO it can affect the optimizations decisions before FDO
matching, leading to some mismatches.

Differential Revision: https://reviews.llvm.org/D101494
  • Loading branch information
teresajohnson committed May 10, 2021
1 parent 9ffd492 commit 220f6e5
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 11 deletions.
32 changes: 23 additions & 9 deletions llvm/lib/Transforms/Utils/SimplifyCFG.cpp
Expand Up @@ -151,9 +151,10 @@ static cl::opt<unsigned> MaxSpeculationDepth(
"speculatively executed instructions"));

static cl::opt<int>
MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10),
cl::desc("Max size of a block which is still considered "
"small enough to thread through"));
MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
cl::init(10),
cl::desc("Max size of a block which is still considered "
"small enough to thread through"));

// Two is chosen to allow one negation and a logical combine.
static cl::opt<unsigned>
Expand Down Expand Up @@ -2527,19 +2528,32 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
int Size = 0;

for (Instruction &I : BB->instructionsWithoutDebug()) {
if (Size > MaxSmallBlockSize)
return false; // Don't clone large BB's.
SmallPtrSet<const Value *, 32> EphValues;
auto IsEphemeral = [&](const Value *V) {
if (isa<AssumeInst>(V))
return true;
return isSafeToSpeculativelyExecute(V) &&
all_of(V->users(),
[&](const User *U) { return EphValues.count(U); });
};

// Walk the loop in reverse so that we can identify ephemeral values properly
// (values only feeding assumes).
for (Instruction &I : reverse(BB->instructionsWithoutDebug())) {
// Can't fold blocks that contain noduplicate or convergent calls.
if (CallInst *CI = dyn_cast<CallInst>(&I))
if (CI->cannotDuplicate() || CI->isConvergent())
return false;

// Ignore ephemeral values which are deleted during codegen.
if (IsEphemeral(&I))
EphValues.insert(&I);
// We will delete Phis while threading, so Phis should not be accounted in
// block's size
if (!isa<PHINode>(I))
++Size;
// block's size.
else if (!isa<PHINode>(I)) {
if (Size++ > MaxSmallBlockSize)
return false; // Don't clone large BB's.
}

// We can only support instructions that do not define values that are
// live outside of the current basic block.
Expand Down
63 changes: 61 additions & 2 deletions llvm/test/Transforms/SimplifyCFG/unprofitable-pr.ll
@@ -1,10 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -simplifycfg-max-small-block-size=10 -S < %s | FileCheck %s
; RUN: opt -passes=simplify-cfg -simplifycfg-max-small-block-size=10 -S < %s | FileCheck %s
; RUN: opt -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -simplifycfg-max-small-block-size=6 -S < %s | FileCheck %s
; RUN: opt -passes=simplify-cfg -simplifycfg-max-small-block-size=6 -S < %s | FileCheck %s

target datalayout = "e-p:64:64-p5:32:32-A5"

declare void @llvm.assume(i1)
declare i1 @llvm.type.test(i8*, metadata) nounwind readnone

define void @test_01(i1 %c, i64* align 1 %ptr) local_unnamed_addr #0 {
; CHECK-LABEL: @test_01(
Expand Down Expand Up @@ -165,3 +166,61 @@ false2: ; preds = %true1
store volatile i64 3, i64* %ptr, align 8
ret void
}

; Try the max block size for PRE again but with the bitcast/type test/assume
; sequence used for whole program devirt.
define void @test_04(i1 %c, i64* align 1 %ptr, [3 x i8*]* %vtable) local_unnamed_addr #0 {
; CHECK-LABEL: @test_04(
; CHECK-NEXT: br i1 [[C:%.*]], label [[TRUE2_CRITEDGE:%.*]], label [[FALSE1:%.*]]
; CHECK: false1:
; CHECK-NEXT: store volatile i64 1, i64* [[PTR:%.*]], align 4
; CHECK-NEXT: [[VTABLE:%.*]] = bitcast [3 x i8*]* %vtable to i8*
; CHECK-NEXT: [[P:%.*]] = call i1 @llvm.type.test(i8* [[VTABLE]], metadata !"foo")
; CHECK-NEXT: tail call void @llvm.assume(i1 [[P]])
; CHECK-NEXT: store volatile i64 0, i64* [[PTR]], align 8
; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
; CHECK-NEXT: store volatile i64 3, i64* [[PTR]], align 8
; CHECK-NEXT: ret void
; CHECK: true2.critedge:
; CHECK-NEXT: [[VTABLE:%.*]] = bitcast [3 x i8*]* %vtable to i8*
; CHECK-NEXT: [[P:%.*]] = call i1 @llvm.type.test(i8* [[VTABLE]], metadata !"foo")
; CHECK-NEXT: tail call void @llvm.assume(i1 [[P]])
; CHECK-NEXT: store volatile i64 0, i64* [[PTR]], align 8
; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
; CHECK-NEXT: store volatile i64 2, i64* [[PTR]], align 8
; CHECK-NEXT: ret void
;
br i1 %c, label %true1, label %false1

true1: ; preds = %false1, %0
%vtablei8 = bitcast [3 x i8*]* %vtable to i8*
%p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"foo")
tail call void @llvm.assume(i1 %p)
store volatile i64 0, i64* %ptr, align 8
store volatile i64 -1, i64* %ptr, align 8
store volatile i64 -1, i64* %ptr, align 8
store volatile i64 -1, i64* %ptr, align 8
store volatile i64 -1, i64* %ptr, align 8
store volatile i64 -1, i64* %ptr, align 8
br i1 %c, label %true2, label %false2

false1: ; preds = %0
store volatile i64 1, i64* %ptr, align 4
br label %true1

true2: ; preds = %true1
store volatile i64 2, i64* %ptr, align 8
ret void

false2: ; preds = %true1
store volatile i64 3, i64* %ptr, align 8
ret void
}

0 comments on commit 220f6e5

Please sign in to comment.