diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp index 66e45ecbde7df..e84ca819b93d8 100644 --- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp @@ -122,16 +122,22 @@ static cl::opt cl::desc("Maximum cost accepted for the transformation"), cl::Hidden, cl::init(50)); -extern cl::opt ProfcheckDisableMetadataFixes; - -} // namespace llvm - static cl::opt MaxClonedRate( "dfa-max-cloned-rate", cl::desc( "Maximum cloned instructions rate accepted for the transformation"), cl::Hidden, cl::init(7.5)); +static cl::opt + MaxOuterUseBlocks("dfa-max-out-use-blocks", + cl::desc("Maximum unduplicated blocks with outer uses " + "accepted for the transformation"), + cl::Hidden, cl::init(40)); + +extern cl::opt ProfcheckDisableMetadataFixes; + +} // namespace llvm + namespace { class SelectInstToUnfold { SelectInst *SI; @@ -965,8 +971,16 @@ struct TransformDFA { // SLPVectorizer. // TODO: Thread the switch partially before reaching the threshold. uint64_t NumOrigInst = 0; - for (auto *BB : DuplicateMap.keys()) + uint64_t NumOuterUseBlock = 0; + for (auto *BB : DuplicateMap.keys()) { NumOrigInst += BB->sizeWithoutDebug(); + // Only unduplicated blocks with single predecessor require new phi + // nodes. + for (auto *Succ : successors(BB)) + if (!DuplicateMap.count(Succ) && Succ->getSinglePredecessor()) + NumOuterUseBlock++; + } + if (double(NumClonedInst) / double(NumOrigInst) > MaxClonedRate) { LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, too much " "instructions wll be cloned\n"); @@ -977,6 +991,20 @@ struct TransformDFA { return false; } + // Too much unduplicated blocks with outer uses may cause too much + // insertions of phi nodes for duplicated definitions. TODO: Drop this + // threshold if we come up with another way to reduce the number of inserted + // phi nodes. + if (NumOuterUseBlock > MaxOuterUseBlocks) { + LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, too much " + "blocks with outer uses\n"); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NotProfitable", Switch) + << "Too much blocks with outer uses."; + }); + return false; + } + InstructionCost DuplicationCost = 0; unsigned JumpTableSize = 0; diff --git a/llvm/test/Transforms/DFAJumpThreading/max-outer-uses.ll b/llvm/test/Transforms/DFAJumpThreading/max-outer-uses.ll new file mode 100644 index 0000000000000..dfcc5b1a5c3fe --- /dev/null +++ b/llvm/test/Transforms/DFAJumpThreading/max-outer-uses.ll @@ -0,0 +1,326 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -passes=dfa-jump-threading -dfa-max-out-use-blocks=5 %s | FileCheck %s + +declare void @use(i32) + +define void @max_outer_uses_by_switch(i32 %cond, ptr %p) { +; CHECK-LABEL: define void @max_outer_uses_by_switch( +; CHECK-SAME: i32 [[COND:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[SWITCH_BB:.*]] +; CHECK: [[SWITCH_BB]]: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[DETERMINE:%.*]], %[[SUB_SWITCH_BB:.*]] ], [ 2, %[[CASE2:.*]] ] +; CHECK-NEXT: switch i32 [[PHI]], label %[[DEFAULT_DEST:.*]] [ +; CHECK-NEXT: i32 0, label %[[CASE1:.*]] +; CHECK-NEXT: i32 1, label %[[CASE2]] +; CHECK-NEXT: i32 2, label %[[CASE3:.*]] +; CHECK-NEXT: ] +; CHECK: [[CASE1]]: +; CHECK-NEXT: br label %[[SUB_SWITCH_BB]] +; CHECK: [[CASE3]]: +; CHECK-NEXT: br label %[[SUB_SWITCH_BB]] +; CHECK: [[SUB_SWITCH_BB]]: +; CHECK-NEXT: [[DETERMINE]] = phi i32 [ 1, %[[CASE1]] ], [ 3, %[[CASE3]] ] +; CHECK-NEXT: [[DEF:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: switch i32 [[COND]], label %[[SWITCH_BB]] [ +; CHECK-NEXT: i32 0, label %[[OUTER1:.*]] +; CHECK-NEXT: i32 1, label %[[OUTER2:.*]] +; CHECK-NEXT: i32 2, label %[[OUTER3:.*]] +; CHECK-NEXT: i32 3, label %[[OUTER4:.*]] +; CHECK-NEXT: ] +; CHECK: [[CASE2]]: +; CHECK-NEXT: br label %[[SWITCH_BB]] +; CHECK: [[OUTER1]]: +; CHECK-NEXT: call void @use(i32 [[DEF]]) +; CHECK-NEXT: ret void +; CHECK: [[OUTER2]]: +; CHECK-NEXT: call void @use(i32 [[DEF]]) +; CHECK-NEXT: ret void +; CHECK: [[OUTER3]]: +; CHECK-NEXT: call void @use(i32 [[DEF]]) +; CHECK-NEXT: ret void +; CHECK: [[OUTER4]]: +; CHECK-NEXT: call void @use(i32 [[DEF]]) +; CHECK-NEXT: ret void +; CHECK: [[DEFAULT_DEST]]: +; CHECK-NEXT: ret void +; +entry: + br label %switch_bb + +switch_bb: + %phi = phi i32 [ 0, %entry ], [ %determine, %sub_switch_bb ], [ 2, %case2 ] + switch i32 %phi, label %default_dest [ + i32 0, label %case1 + i32 1, label %case2 + i32 2, label %case3 + ] + +case1: + br label %sub_switch_bb + +case3: + br label %sub_switch_bb + +sub_switch_bb: + %determine = phi i32 [ 1, %case1 ], [ 3, %case3 ] + %def = load i32, ptr %p + switch i32 %cond, label %switch_bb [ + i32 0, label %outer1 + i32 1, label %outer2 + i32 2, label %outer3 + i32 3, label %outer4 + ] + +case2: + br label %switch_bb + +outer1: + call void @use(i32 %def) + ret void + +outer2: + call void @use(i32 %def) + ret void + +outer3: + call void @use(i32 %def) + ret void + +outer4: + call void @use(i32 %def) + ret void + +default_dest: + ret void +} + +define void @less_outer_uses_by_switch(i32 %cond, ptr %p) { +; CHECK-LABEL: define void @less_outer_uses_by_switch( +; CHECK-SAME: i32 [[COND:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[SWITCH_BB:.*]] +; CHECK: [[SWITCH_BB]]: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ poison, %[[SUB_SWITCH_BB:.*]] ] +; CHECK-NEXT: switch i32 [[PHI]], label %[[DEFAULT_DEST:.*]] [ +; CHECK-NEXT: i32 0, label %[[CASE1:.*]] +; CHECK-NEXT: i32 1, label %[[CASE2:.*]] +; CHECK-NEXT: i32 2, label %[[CASE3:.*]] +; CHECK-NEXT: ] +; CHECK: [[SWITCH_BB_JT2:.*]]: +; CHECK-NEXT: [[PHI_JT2:%.*]] = phi i32 [ 2, %[[CASE2]] ] +; CHECK-NEXT: br label %[[CASE3]] +; CHECK: [[SWITCH_BB_JT3:.*]]: +; CHECK-NEXT: [[PHI_JT3:%.*]] = phi i32 [ [[DETERMINE_JT3:%.*]], %[[SUB_SWITCH_BB_JT3:.*]] ] +; CHECK-NEXT: br label %[[DEFAULT_DEST]] +; CHECK: [[SWITCH_BB_JT1:.*]]: +; CHECK-NEXT: [[PHI_JT1:%.*]] = phi i32 [ [[DETERMINE_JT1:%.*]], %[[SUB_SWITCH_BB_JT1:.*]] ] +; CHECK-NEXT: br label %[[CASE2]] +; CHECK: [[CASE1]]: +; CHECK-NEXT: br label %[[SUB_SWITCH_BB_JT1]] +; CHECK: [[CASE3]]: +; CHECK-NEXT: br label %[[SUB_SWITCH_BB_JT3]] +; CHECK: [[SUB_SWITCH_BB]]: +; CHECK-NEXT: [[DEF:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: switch i32 [[COND]], label %[[SWITCH_BB]] [ +; CHECK-NEXT: i32 0, label %[[OUTER1:.*]] +; CHECK-NEXT: ] +; CHECK: [[SUB_SWITCH_BB_JT3]]: +; CHECK-NEXT: [[DETERMINE_JT3]] = phi i32 [ 3, %[[CASE3]] ] +; CHECK-NEXT: [[DEF_JT3:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: switch i32 [[COND]], label %[[SWITCH_BB_JT3]] [ +; CHECK-NEXT: i32 0, label %[[OUTER1]] +; CHECK-NEXT: ] +; CHECK: [[SUB_SWITCH_BB_JT1]]: +; CHECK-NEXT: [[DETERMINE_JT1]] = phi i32 [ 1, %[[CASE1]] ] +; CHECK-NEXT: [[DEF_JT1:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: switch i32 [[COND]], label %[[SWITCH_BB_JT1]] [ +; CHECK-NEXT: i32 0, label %[[OUTER1]] +; CHECK-NEXT: ] +; CHECK: [[CASE2]]: +; CHECK-NEXT: br label %[[SWITCH_BB_JT2]] +; CHECK: [[OUTER1]]: +; CHECK-NEXT: [[DEF1:%.*]] = phi i32 [ [[DEF_JT3]], %[[SUB_SWITCH_BB_JT3]] ], [ [[DEF_JT1]], %[[SUB_SWITCH_BB_JT1]] ], [ [[DEF]], %[[SUB_SWITCH_BB]] ] +; CHECK-NEXT: call void @use(i32 [[DEF1]]) +; CHECK-NEXT: ret void +; CHECK: [[DEFAULT_DEST]]: +; CHECK-NEXT: ret void +; +entry: + br label %switch_bb + +switch_bb: + %phi = phi i32 [ 0, %entry ], [ %determine, %sub_switch_bb ], [ 2, %case2 ] + switch i32 %phi, label %default_dest [ + i32 0, label %case1 + i32 1, label %case2 + i32 2, label %case3 + ] + +case1: + br label %sub_switch_bb + +case3: + br label %sub_switch_bb + +sub_switch_bb: + %determine = phi i32 [ 1, %case1 ], [ 3, %case3 ] + %def = load i32, ptr %p + switch i32 %cond, label %switch_bb [ + i32 0, label %outer1 + ] + +case2: + br label %switch_bb + +outer1: + call void @use(i32 %def) + ret void + +default_dest: + ret void +} + + +define void @max_outer_uses_multi_preds(i32 %cond, ptr %p) { +; CHECK-LABEL: define void @max_outer_uses_multi_preds( +; CHECK-SAME: i32 [[COND:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[SWITCH_BB:.*]] +; CHECK: [[SWITCH_BB]]: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ poison, %[[SUB_SWITCH_BB:.*]] ] +; CHECK-NEXT: switch i32 [[PHI]], label %[[DEFAULT_DEST:.*]] [ +; CHECK-NEXT: i32 0, label %[[CASE1:.*]] +; CHECK-NEXT: i32 1, label %[[CASE2:.*]] +; CHECK-NEXT: i32 2, label %[[CASE3:.*]] +; CHECK-NEXT: i32 3, label %[[CASE4:.*]] +; CHECK-NEXT: ] +; CHECK: [[SWITCH_BB_JT2:.*]]: +; CHECK-NEXT: [[PHI_JT2:%.*]] = phi i32 [ 2, %[[CASE2]] ] +; CHECK-NEXT: br label %[[CASE3]] +; CHECK: [[SWITCH_BB_JT3:.*]]: +; CHECK-NEXT: [[PHI_JT3:%.*]] = phi i32 [ [[DETERMINE_JT3:%.*]], %[[SUB_SWITCH_BB_JT3:.*]] ] +; CHECK-NEXT: br label %[[CASE4]] +; CHECK: [[SWITCH_BB_JT1:.*]]: +; CHECK-NEXT: [[PHI_JT1:%.*]] = phi i32 [ [[DETERMINE_JT1:%.*]], %[[SUB_SWITCH_BB_JT1:.*]] ] +; CHECK-NEXT: br label %[[CASE2]] +; CHECK: [[CASE1]]: +; CHECK-NEXT: br label %[[SUB_SWITCH_BB_JT1]] +; CHECK: [[CASE3]]: +; CHECK-NEXT: br label %[[SUB_SWITCH_BB_JT3]] +; CHECK: [[SUB_SWITCH_BB]]: +; CHECK-NEXT: [[DEF:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: switch i32 [[COND]], label %[[SWITCH_BB]] [ +; CHECK-NEXT: i32 0, label %[[OUTER1:.*]] +; CHECK-NEXT: i32 1, label %[[OUTER2:.*]] +; CHECK-NEXT: i32 2, label %[[OUTER3:.*]] +; CHECK-NEXT: i32 3, label %[[OUTER4:.*]] +; CHECK-NEXT: ] +; CHECK: [[SUB_SWITCH_BB_JT3]]: +; CHECK-NEXT: [[DETERMINE_JT3]] = phi i32 [ 3, %[[CASE3]] ] +; CHECK-NEXT: [[DEF_JT3:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: switch i32 [[COND]], label %[[SWITCH_BB_JT3]] [ +; CHECK-NEXT: i32 0, label %[[OUTER1]] +; CHECK-NEXT: i32 1, label %[[OUTER2]] +; CHECK-NEXT: i32 2, label %[[OUTER3]] +; CHECK-NEXT: i32 3, label %[[OUTER4]] +; CHECK-NEXT: ] +; CHECK: [[SUB_SWITCH_BB_JT1]]: +; CHECK-NEXT: [[DETERMINE_JT1]] = phi i32 [ 1, %[[CASE1]] ] +; CHECK-NEXT: [[DEF_JT1:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: switch i32 [[COND]], label %[[SWITCH_BB_JT1]] [ +; CHECK-NEXT: i32 0, label %[[OUTER1]] +; CHECK-NEXT: i32 1, label %[[OUTER2]] +; CHECK-NEXT: i32 2, label %[[OUTER3]] +; CHECK-NEXT: i32 3, label %[[OUTER4]] +; CHECK-NEXT: ] +; CHECK: [[CASE4]]: +; CHECK-NEXT: [[DEF1:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: switch i32 [[COND]], label %[[OUTER4]] [ +; CHECK-NEXT: i32 0, label %[[OUTER1]] +; CHECK-NEXT: i32 1, label %[[OUTER2]] +; CHECK-NEXT: i32 2, label %[[OUTER3]] +; CHECK-NEXT: ] +; CHECK: [[CASE2]]: +; CHECK-NEXT: br label %[[SWITCH_BB_JT2]] +; CHECK: [[OUTER1]]: +; CHECK-NEXT: [[PHI1:%.*]] = phi i32 [ [[DEF]], %[[SUB_SWITCH_BB]] ], [ [[DEF1]], %[[CASE4]] ], [ [[DEF_JT1]], %[[SUB_SWITCH_BB_JT1]] ], [ [[DEF_JT3]], %[[SUB_SWITCH_BB_JT3]] ] +; CHECK-NEXT: call void @use(i32 [[PHI1]]) +; CHECK-NEXT: ret void +; CHECK: [[OUTER2]]: +; CHECK-NEXT: [[PHI2:%.*]] = phi i32 [ [[DEF]], %[[SUB_SWITCH_BB]] ], [ [[DEF1]], %[[CASE4]] ], [ [[DEF_JT1]], %[[SUB_SWITCH_BB_JT1]] ], [ [[DEF_JT3]], %[[SUB_SWITCH_BB_JT3]] ] +; CHECK-NEXT: call void @use(i32 [[PHI2]]) +; CHECK-NEXT: ret void +; CHECK: [[OUTER3]]: +; CHECK-NEXT: [[PHI3:%.*]] = phi i32 [ [[DEF]], %[[SUB_SWITCH_BB]] ], [ [[DEF1]], %[[CASE4]] ], [ [[DEF_JT1]], %[[SUB_SWITCH_BB_JT1]] ], [ [[DEF_JT3]], %[[SUB_SWITCH_BB_JT3]] ] +; CHECK-NEXT: call void @use(i32 [[PHI3]]) +; CHECK-NEXT: ret void +; CHECK: [[OUTER4]]: +; CHECK-NEXT: [[PHI4:%.*]] = phi i32 [ [[DEF]], %[[SUB_SWITCH_BB]] ], [ [[DEF1]], %[[CASE4]] ], [ [[DEF_JT1]], %[[SUB_SWITCH_BB_JT1]] ], [ [[DEF_JT3]], %[[SUB_SWITCH_BB_JT3]] ] +; CHECK-NEXT: call void @use(i32 [[PHI4]]) +; CHECK-NEXT: ret void +; CHECK: [[DEFAULT_DEST]]: +; CHECK-NEXT: ret void +; +entry: + br label %switch_bb + +switch_bb: + %phi = phi i32 [ 0, %entry ], [ %determine, %sub_switch_bb ], [ 2, %case2 ] + switch i32 %phi, label %default_dest [ + i32 0, label %case1 + i32 1, label %case2 + i32 2, label %case3 + i32 3, label %case4 + ] + +case1: + br label %sub_switch_bb + +case3: + br label %sub_switch_bb + +sub_switch_bb: + %determine = phi i32 [ 1, %case1 ], [ 3, %case3 ] + %def = load i32, ptr %p + switch i32 %cond, label %switch_bb [ + i32 0, label %outer1 + i32 1, label %outer2 + i32 2, label %outer3 + i32 3, label %outer4 + ] + +case4: + %def1 = load i32, ptr %p + switch i32 %cond, label %outer4 [ + i32 0, label %outer1 + i32 1, label %outer2 + i32 2, label %outer3 + ] + +case2: + br label %switch_bb + +outer1: + %phi1 = phi i32 [ %def, %sub_switch_bb ], [ %def1, %case4 ] + call void @use(i32 %phi1) + ret void + +outer2: + %phi2 = phi i32 [ %def, %sub_switch_bb ], [ %def1, %case4 ] + call void @use(i32 %phi2) + ret void + +outer3: + %phi3 = phi i32 [ %def, %sub_switch_bb ], [ %def1, %case4 ] + call void @use(i32 %phi3) + ret void + +outer4: + %phi4 = phi i32 [ %def, %sub_switch_bb ], [ %def1, %case4 ] + call void @use(i32 %phi4) + ret void + +default_dest: + ret void +}