diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h index 349d5a7a08795..e9ddff01f728c 100644 --- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h @@ -188,6 +188,8 @@ class FunctionSpecializer { bool run(); + static unsigned getBlockFreqMultiplier(); + InstCostVisitor getInstCostVisitorFor(Function *F) { auto &BFI = (GetBFI)(*F); auto &TTI = (GetTTI)(*F); diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp index a970253d9b1c8..a635d7b4d40aa 100644 --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -74,6 +74,22 @@ static cl::opt ForceSpecialization( "Force function specialization for every call site with a constant " "argument")); +// Set to 2^3 to model three levels of if-else nest. +static cl::opt BlockFreqMultiplier( + "funcspec-block-freq-multiplier", cl::init(8), cl::Hidden, cl::desc( + "Multiplier to scale block frequency of user instructions during " + "specialization bonus estimation")); + +static cl::opt MinEntryFreq( + "funcspec-min-entry-freq", cl::init(450), cl::Hidden, cl::desc( + "Do not specialize functions with entry block frequency lower than " + "this value")); + +static cl::opt MinScore( + "funcspec-min-score", cl::init(2), cl::Hidden, cl::desc( + "Do not specialize functions with score lower than this value " + "(the ratio of specialization bonus over specialization cost)")); + static cl::opt MaxClones( "funcspec-max-clones", cl::init(3), cl::Hidden, cl::desc( "The maximum number of clones allowed for a single function " @@ -88,15 +104,15 @@ static cl::opt SpecializeOnAddress( "funcspec-on-address", cl::init(false), cl::Hidden, cl::desc( "Enable function specialization on the address of global values")); -// Disabled by default as it can significantly increase compilation times. -// -// https://llvm-compile-time-tracker.com -// https://github.com/nikic/llvm-compile-time-tracker static cl::opt SpecializeLiteralConstant( - "funcspec-for-literal-constant", cl::init(false), cl::Hidden, cl::desc( + "funcspec-for-literal-constant", cl::init(true), cl::Hidden, cl::desc( "Enable specialization of functions that take a literal constant as an " "argument")); +unsigned FunctionSpecializer::getBlockFreqMultiplier() { + return BlockFreqMultiplier; +} + // Estimates the instruction cost of all the basic blocks in \p WorkList. // The successors of such blocks are added to the list as long as they are // executable and they have a unique predecessor. \p WorkList represents @@ -114,7 +130,8 @@ static Cost estimateBasicBlocks(SmallVectorImpl &WorkList, while (!WorkList.empty()) { BasicBlock *BB = WorkList.pop_back_val(); - uint64_t Weight = BFI.getBlockFreq(BB).getFrequency() / + uint64_t Weight = BlockFreqMultiplier * + BFI.getBlockFreq(BB).getFrequency() / BFI.getEntryFreq(); if (!Weight) continue; @@ -167,7 +184,8 @@ Cost InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C) { KnownConstants.insert({User, C}); - uint64_t Weight = BFI.getBlockFreq(User->getParent()).getFrequency() / + uint64_t Weight = BlockFreqMultiplier * + BFI.getBlockFreq(User->getParent()).getFrequency() / BFI.getEntryFreq(); if (!Weight) return 0; @@ -649,6 +667,7 @@ bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost, if (Args.empty()) return false; + bool HasCheckedEntryFreq = false; for (User *U : F->users()) { if (!isa(U) && !isa(U)) continue; @@ -684,6 +703,21 @@ bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost, if (S.Args.empty()) continue; + // Check the function entry frequency only once. We sink this code here to + // postpone running the Block Frequency Analysis until we know for sure + // there are Specialization candidates, otherwise we are adding unnecessary + // overhead. + if (!HasCheckedEntryFreq) { + // Reject cold functions (for some definition of 'cold'). + uint64_t EntryFreq = (GetBFI)(*F).getEntryFreq(); + if (!ForceSpecialization && EntryFreq < MinEntryFreq) + return false; + + HasCheckedEntryFreq = true; + LLVM_DEBUG(dbgs() << "FnSpecialization: Entry block frequency for " + << F->getName() << " = " << EntryFreq << "\n"); + } + // Check if we have encountered the same specialisation already. if (auto It = UniqueSpecs.find(S); It != UniqueSpecs.end()) { // Existing specialisation. Add the call to the list to rewrite, unless @@ -698,13 +732,14 @@ bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost, AllSpecs[Index].CallSites.push_back(&CS); } else { // Calculate the specialisation gain. - Cost Score = 0 - SpecCost; + Cost Score = 0; InstCostVisitor Visitor = getInstCostVisitorFor(F); for (ArgInfo &A : S.Args) Score += getSpecializationBonus(A.Formal, A.Actual, Visitor); + Score /= SpecCost; // Discard unprofitable specialisations. - if (!ForceSpecialization && Score <= 0) + if (!ForceSpecialization && Score < MinScore) continue; // Create a new specialisation entry. diff --git a/llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll b/llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll index 5cbfaade98d3c..7c390dadef777 100644 --- a/llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll +++ b/llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll @@ -1,4 +1,4 @@ -; RUN: opt -S --passes="default" < %s | FileCheck %s +; RUN: opt -S --passes="default" -force-specialization < %s | FileCheck %s define dso_local i32 @g0(i32 noundef %x) local_unnamed_addr { entry: diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll index 003f80fa260ff..ef73ed63b863b 100644 --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll @@ -1,11 +1,9 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes="ipsccp" -force-specialization -S < %s | FileCheck %s ; Test function specialization wouldn't crash due to constant expression. ; Note that this test case shows that function specialization pass would ; transform the function even if no specialization happened. -; RUN: opt -passes="ipsccp" -force-specialization -S < %s | FileCheck %s - %struct = type { i8, i16, i32, i64, i64} @Global = internal constant %struct {i8 0, i16 1, i32 2, i64 3, i64 4} @@ -26,19 +24,6 @@ entry: } define internal i64 @zoo(i1 %flag) { -; CHECK-LABEL: @zoo( -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]] -; CHECK: plus: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @func2.2(ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 3)) -; CHECK-NEXT: br label [[MERGE:%.*]] -; CHECK: minus: -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @func2.1(ptr getelementptr inbounds ([[STRUCT]], ptr @Global, i32 0, i32 4)) -; CHECK-NEXT: br label [[MERGE]] -; CHECK: merge: -; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ ptrtoint (ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 3) to i64), [[PLUS]] ], [ ptrtoint (ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 4) to i64), [[MINUS]] ] -; CHECK-NEXT: ret i64 [[TMP2]] -; entry: br i1 %flag, label %plus, label %minus @@ -60,10 +45,9 @@ merge: define i64 @main() { ; CHECK-LABEL: @main( -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @zoo(i1 false) -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @zoo(i1 true) -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP1]], [[TMP2]] -; CHECK-NEXT: ret i64 [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @zoo.4(i1 false) +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @zoo.3(i1 true) +; CHECK-NEXT: ret i64 add (i64 ptrtoint (ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 4) to i64), i64 ptrtoint (ptr getelementptr inbounds ([[STRUCT]], ptr @Global, i32 0, i32 3) to i64)) ; %1 = call i64 @zoo(i1 0) %2 = call i64 @zoo(i1 1) @@ -71,3 +55,29 @@ define i64 @main() { ret i64 %3 } +; CHECK-LABEL: @func2.1( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i64 undef + +; CHECK-LABEL: @func2.2( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i64 undef + +; CHECK-LABEL: @zoo.3( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[PLUS:%.*]] +; CHECK: plus: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @func2.2(ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 3)) +; CHECK-NEXT: br label [[MERGE:%.*]] +; CHECK: merge: +; CHECK-NEXT: ret i64 undef + +; CHECK-LABEL: @zoo.4( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[MINUS:%.*]] +; CHECK: minus: +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @func2.1(ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 4)) +; CHECK-NEXT: br label [[MERGE:%.*]] +; CHECK: merge: +; CHECK-NEXT: ret i64 undef + diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize3.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize3.ll index 525721f03cfb2..609058764262b 100644 --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize3.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-minsize3.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes="ipsccp" -funcspec-min-function-size=3 -S < %s | FileCheck %s +; RUN: opt -passes="ipsccp" -force-specialization -S < %s | FileCheck %s ; Checks for callsites that have been annotated with MinSize. We only expect ; specialisation for the call that does not have the attribute: diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization.ll index b5d16f6dab1c0..21be617fd5c3b 100644 --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization.ll @@ -1,5 +1,5 @@ -; RUN: opt -passes="ipsccp" -funcspec-min-function-size=3 -S < %s | FileCheck %s -; RUN: opt -passes="ipsccp" -funcspec-min-function-size=3 -S < %s | FileCheck %s --check-prefix=NOFSPEC +; RUN: opt -passes="ipsccp" -force-specialization -S < %s | FileCheck %s +; RUN: opt -passes="ipsccp" -force-specialization -S < %s | FileCheck %s --check-prefix=NOFSPEC define i64 @main(i64 %x, i1 %flag) { ; diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll deleted file mode 100644 index 950ed13f7b9e1..0000000000000 --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll +++ /dev/null @@ -1,88 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes="ipsccp,deadargelim" -force-specialization -S < %s | FileCheck %s -; RUN: opt -passes="ipsccp,deadargelim" -funcspec-max-iters=1 -force-specialization -S < %s | FileCheck %s -; RUN: opt -passes="ipsccp,deadargelim" -funcspec-max-iters=0 -force-specialization -S < %s | FileCheck %s --check-prefix=DISABLED - -; DISABLED-NOT: @func.1( -; DISABLED-NOT: @func.2( - -define internal i32 @func(ptr %0, i32 %1, ptr nocapture %2) { - %4 = alloca i32, align 4 - store i32 %1, ptr %4, align 4 - %5 = load i32, ptr %4, align 4 - %6 = icmp slt i32 %5, 1 - br i1 %6, label %14, label %7 - -7: ; preds = %3 - %8 = load i32, ptr %4, align 4 - %9 = sext i32 %8 to i64 - %10 = getelementptr inbounds i32, ptr %0, i64 %9 - call void %2(ptr %10) - %11 = load i32, ptr %4, align 4 - %12 = add nsw i32 %11, -1 - %13 = call i32 @func(ptr %0, i32 %12, ptr %2) - br label %14 - -14: ; preds = %3, %7 - ret i32 0 -} - -define internal void @increment(ptr nocapture %0) { - %2 = load i32, ptr %0, align 4 - %3 = add nsw i32 %2, 1 - store i32 %3, ptr %0, align 4 - ret void -} - -define internal void @decrement(ptr nocapture %0) { - %2 = load i32, ptr %0, align 4 - %3 = add nsw i32 %2, -1 - store i32 %3, ptr %0, align 4 - ret void -} - -define i32 @main(ptr %0, i32 %1) { -; CHECK: call void @func.2(ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) - %3 = call i32 @func(ptr %0, i32 %1, ptr nonnull @increment) -; CHECK: call void @func.1(ptr [[TMP0]], i32 0) - %4 = call i32 @func(ptr %0, i32 %3, ptr nonnull @decrement) -; CHECK: ret i32 0 - ret i32 %4 -} - -; CHECK: @func.1( -; CHECK: [[TMP3:%.*]] = alloca i32, align 4 -; CHECK: store i32 [[TMP1:%.*]], ptr [[TMP3]], align 4 -; CHECK: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -; CHECK: [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1 -; CHECK: br i1 [[TMP5]], label [[TMP13:%.*]], label [[TMP6:%.*]] -; CHECK: 6: -; CHECK: [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4 -; CHECK: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64 -; CHECK: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP8]] -; CHECK: call void @decrement(ptr [[TMP9]]) -; CHECK: [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4 -; CHECK: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1 -; CHECK: call void @func.1(ptr [[TMP0]], i32 [[TMP11]]) -; CHECK: br label [[TMP12:%.*]] -; CHECK: 12: -; CHECK: ret void -; -; -; CHECK: @func.2( -; CHECK: [[TMP3:%.*]] = alloca i32, align 4 -; CHECK: store i32 [[TMP1:%.*]], ptr [[TMP3]], align 4 -; CHECK: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -; CHECK: [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1 -; CHECK: br i1 [[TMP5]], label [[TMP13:%.*]], label [[TMP6:%.*]] -; CHECK: 6: -; CHECK: [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4 -; CHECK: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64 -; CHECK: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP8]] -; CHECK: call void @increment(ptr [[TMP9]]) -; CHECK: [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4 -; CHECK: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1 -; CHECK: call void @func.2(ptr [[TMP0]], i32 [[TMP11]]) -; CHECK: br label [[TMP12:%.*]] -; CHECK: 12: -; CHECK: ret void diff --git a/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll b/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll index 9b14db5399f3d..84231b1cae6e4 100644 --- a/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll +++ b/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll @@ -1,4 +1,4 @@ -; RUN: opt -S --passes="ipsccp" < %s | FileCheck %s +; RUN: opt -S --passes="ipsccp" -force-specialization < %s | FileCheck %s define dso_local i32 @p0(i32 noundef %x) { entry: %add = add nsw i32 %x, 1 diff --git a/llvm/test/Transforms/FunctionSpecialization/global-rank.ll b/llvm/test/Transforms/FunctionSpecialization/global-rank.ll index 541faa2e19515..d46b73d156894 100644 --- a/llvm/test/Transforms/FunctionSpecialization/global-rank.ll +++ b/llvm/test/Transforms/FunctionSpecialization/global-rank.ll @@ -1,4 +1,5 @@ -; RUN: opt -S --passes="ipsccp" -funcspec-max-clones=1 < %s | FileCheck %s +; RUN: opt -S --passes="ipsccp" -funcspec-max-clones=1 -force-specialization < %s | FileCheck %s + define internal i32 @f(i32 noundef %x, ptr nocapture noundef readonly %p, ptr nocapture noundef readonly %q) noinline { entry: %call = tail call i32 %p(i32 noundef %x) diff --git a/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll b/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll index c2ba0920c2be3..2cfbf9dd7bdaa 100644 --- a/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll +++ b/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll @@ -6,10 +6,10 @@ define i64 @main(i64 %x, i64 %y, i1 %flag) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]] ; CHECK: plus: -; CHECK-NEXT: [[CMP0:%.*]] = call i64 @compute.2(i64 [[X:%.*]], i64 [[Y:%.*]], ptr @plus, ptr @minus) +; CHECK-NEXT: [[CMP0:%.*]] = call i64 @compute.2(i64 [[X:%.*]], i64 42, ptr @plus, ptr @minus) ; CHECK-NEXT: br label [[MERGE:%.*]] ; CHECK: minus: -; CHECK-NEXT: [[CMP1:%.*]] = call i64 @compute.3(i64 [[X]], i64 [[Y]], ptr @minus, ptr @plus) +; CHECK-NEXT: [[CMP1:%.*]] = call i64 @compute.3(i64 [[X]], i64 [[Y:%.*]], ptr @minus, ptr @plus) ; CHECK-NEXT: br label [[MERGE]] ; CHECK: merge: ; CHECK-NEXT: [[PH:%.*]] = phi i64 [ [[CMP0]], [[PLUS]] ], [ [[CMP1]], [[MINUS]] ] @@ -20,7 +20,7 @@ entry: br i1 %flag, label %plus, label %minus plus: - %cmp0 = call i64 @compute(i64 %x, i64 %y, ptr @plus, ptr @minus) + %cmp0 = call i64 @compute(i64 %x, i64 42, ptr @plus, ptr @minus) br label %merge minus: @@ -68,9 +68,9 @@ entry: ; CHECK-LABEL: @compute.2 ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP0:%.*]] = call i64 @plus(i64 [[X:%.*]], i64 [[Y:%.*]]) -; CHECK-NEXT: [[CMP1:%.*]] = call i64 @minus(i64 [[X]], i64 [[Y]]) -; CHECK-NEXT: [[CMP2:%.*]] = call i64 @compute.1(i64 [[X]], i64 [[Y]], ptr @plus, ptr @plus) +; CHECK-NEXT: [[CMP0:%.*]] = call i64 @plus(i64 [[X:%.*]], i64 42) +; CHECK-NEXT: [[CMP1:%.*]] = call i64 @minus(i64 [[X]], i64 42) +; CHECK-NEXT: [[CMP2:%.*]] = call i64 @compute.1(i64 [[X]], i64 42, ptr @plus, ptr @plus) ; CHECK-LABEL: @compute.3 ; CHECK-NEXT: entry: diff --git a/llvm/test/Transforms/FunctionSpecialization/literal-const.ll b/llvm/test/Transforms/FunctionSpecialization/literal-const.ll index fc400202ab91e..479a841567ad7 100644 --- a/llvm/test/Transforms/FunctionSpecialization/literal-const.ll +++ b/llvm/test/Transforms/FunctionSpecialization/literal-const.ll @@ -1,7 +1,8 @@ ; RUN: opt -S --passes="ipsccp" \ +; RUN: -funcspec-for-literal-constant=0 \ ; RUN: -force-specialization < %s | FileCheck %s -check-prefix CHECK-NOLIT ; RUN: opt -S --passes="ipsccp" \ -; RUN: -funcspec-for-literal-constant \ +; RUN: -funcspec-for-literal-constant=1 \ ; RUN: -force-specialization < %s | FileCheck %s -check-prefix CHECK-LIT define i32 @f0(i32 noundef %x) { diff --git a/llvm/test/Transforms/FunctionSpecialization/max-iters.ll b/llvm/test/Transforms/FunctionSpecialization/max-iters.ll new file mode 100644 index 0000000000000..76d60949f1ade --- /dev/null +++ b/llvm/test/Transforms/FunctionSpecialization/max-iters.ll @@ -0,0 +1,110 @@ +; RUN: opt -passes="ipsccp,deadargelim" -force-specialization -S < %s | FileCheck %s --check-prefixes=COMMON,ITERS1 +; RUN: opt -passes="ipsccp,deadargelim" -funcspec-max-iters=1 -force-specialization -S < %s | FileCheck %s --check-prefixes=COMMON,ITERS1 +; RUN: opt -passes="ipsccp,deadargelim" -funcspec-max-iters=2 -force-specialization -S < %s | FileCheck %s --check-prefixes=COMMON,ITERS2 +; RUN: opt -passes="ipsccp,deadargelim" -funcspec-max-iters=0 -force-specialization -S < %s | FileCheck %s --check-prefix=DISABLED + +; DISABLED-NOT: @func.1( +; DISABLED-NOT: @func.2( +; DISABLED-NOT: @func.3( + +define internal i32 @func(ptr %0, i32 %1, ptr nocapture %2) { + %4 = alloca i32, align 4 + store i32 %1, ptr %4, align 4 + %5 = load i32, ptr %4, align 4 + %6 = icmp slt i32 %5, 1 + br i1 %6, label %14, label %7 + +7: ; preds = %3 + %8 = load i32, ptr %4, align 4 + %9 = sext i32 %8 to i64 + %10 = getelementptr inbounds i32, ptr %0, i64 %9 + call void %2(ptr %10) + %11 = load i32, ptr %4, align 4 + %12 = add nsw i32 %11, -1 + %13 = call i32 @func(ptr %0, i32 %12, ptr %2) + br label %14 + +14: ; preds = %3, %7 + ret i32 0 +} + +define internal void @increment(ptr nocapture %0) { + %2 = load i32, ptr %0, align 4 + %3 = add nsw i32 %2, 1 + store i32 %3, ptr %0, align 4 + ret void +} + +define internal void @decrement(ptr nocapture %0) { + %2 = load i32, ptr %0, align 4 + %3 = add nsw i32 %2, -1 + store i32 %3, ptr %0, align 4 + ret void +} + +define i32 @main(ptr %0, i32 %1) { +; COMMON: define i32 @main( +; COMMON-NEXT: call void @func.2(ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) +; COMMON-NEXT: call void @func.1(ptr [[TMP0]]) +; COMMON-NEXT: ret i32 0 +; + %3 = call i32 @func(ptr %0, i32 %1, ptr nonnull @increment) + %4 = call i32 @func(ptr %0, i32 %3, ptr nonnull @decrement) + ret i32 %4 +} + +; COMMON: define internal void @func.1( +; COMMON-NEXT: [[TMP2:%.*]] = alloca i32, align 4 +; COMMON-NEXT: store i32 0, ptr [[TMP2]], align 4 +; COMMON-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +; COMMON-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1 +; COMMON-NEXT: br i1 [[TMP4]], label [[TMP11:%.*]], label [[TMP5:%.*]] +; COMMON: 5: +; COMMON-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4 +; COMMON-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 +; COMMON-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP7]] +; COMMON-NEXT: call void @decrement(ptr [[TMP8]]) +; COMMON-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +; COMMON-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP9]], -1 +; ITERS1-NEXT: call void @func(ptr [[TMP0]], i32 [[TMP10]], ptr @decrement) +; ITERS2-NEXT: call void @func.3(ptr [[TMP0]], i32 [[TMP10]]) +; COMMON-NEXT: br label [[TMP11:%.*]] +; COMMON: 11: +; COMMON-NEXT: ret void +; +; COMMON: define internal void @func.2( +; COMMON-NEXT: [[TMP3:%.*]] = alloca i32, align 4 +; COMMON-NEXT: store i32 [[TMP1:%.*]], ptr [[TMP3]], align 4 +; COMMON-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +; COMMON-NEXT: [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1 +; COMMON-NEXT: br i1 [[TMP5]], label [[TMP13:%.*]], label [[TMP6:%.*]] +; COMMON: 6: +; COMMON-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4 +; COMMON-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64 +; COMMON-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP8]] +; COMMON-NEXT: call void @increment(ptr [[TMP9]]) +; COMMON-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4 +; COMMON-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1 +; COMMON-NEXT: call void @func.2(ptr [[TMP0]], i32 [[TMP11]]) +; COMMON-NEXT: br label [[TMP12:%.*]] +; COMMON: 12: +; COMMON-NEXT: ret void +; +; ITERS2: define internal void @func.3( +; ITERS2-NEXT: [[TMP3:%.*]] = alloca i32, align 4 +; ITERS2-NEXT: store i32 [[TMP1:%.*]], ptr [[TMP3]], align 4 +; ITERS2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +; ITERS2-NEXT: [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1 +; ITERS2-NEXT: br i1 [[TMP5]], label [[TMP13:%.*]], label [[TMP6:%.*]] +; ITERS2: 6: +; ITERS2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4 +; ITERS2-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64 +; ITERS2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP8]] +; ITERS2-NEXT: call void @decrement(ptr [[TMP9]]) +; ITERS2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4 +; ITERS2-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1 +; ITERS2-NEXT: call void @func.3(ptr [[TMP0]], i32 [[TMP11]]) +; ITERS2-NEXT: br label [[TMP12:%.*]] +; ITERS2: 12: +; ITERS2-NEXT: ret void + diff --git a/llvm/test/Transforms/FunctionSpecialization/noinline.ll b/llvm/test/Transforms/FunctionSpecialization/noinline.ll index 863e6e74eb23c..bf66cf374c488 100644 --- a/llvm/test/Transforms/FunctionSpecialization/noinline.ll +++ b/llvm/test/Transforms/FunctionSpecialization/noinline.ll @@ -1,4 +1,4 @@ -; RUN: opt -S --passes="ipsccp" < %s | FileCheck %s +; RUN: opt -S --passes="ipsccp" -funcspec-min-entry-freq=1 < %s | FileCheck %s define dso_local i32 @p0(i32 noundef %x) { entry: %add = add nsw i32 %x, 1 diff --git a/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll b/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll index 4233998ad9f6d..3db1a8ce69a10 100644 --- a/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll +++ b/llvm/test/Transforms/FunctionSpecialization/remove-dead-recursive-function.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes="ipsccp" -funcspec-min-function-size=3 -S < %s | FileCheck %s +; RUN: opt -passes="ipsccp" -force-specialization -S < %s | FileCheck %s define i64 @main(i64 %x, i1 %flag) { entry: diff --git a/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll b/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll index d1c23e07d5972..73006ae0fcb58 100644 --- a/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll +++ b/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll @@ -1,20 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes="ipsccp" -funcspec-max-clones=0 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=NONE -; RUN: opt -passes="ipsccp" -funcspec-max-clones=1 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=ONE -; RUN: opt -passes="ipsccp" -funcspec-max-clones=2 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=TWO -; RUN: opt -passes="ipsccp" -funcspec-max-clones=3 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=THREE +; RUN: opt -passes="ipsccp" -funcspec-max-clones=0 -force-specialization -S < %s | FileCheck %s --check-prefix=NONE +; RUN: opt -passes="ipsccp" -funcspec-max-clones=1 -force-specialization -S < %s | FileCheck %s --check-prefix=ONE +; RUN: opt -passes="ipsccp" -funcspec-max-clones=2 -force-specialization -S < %s | FileCheck %s --check-prefix=TWO +; RUN: opt -passes="ipsccp" -funcspec-max-clones=3 -force-specialization -S < %s | FileCheck %s --check-prefix=THREE ; Make sure that we iterate correctly after sorting the specializations: -; FnSpecialization: Specializations for function compute -; FnSpecialization: Gain = 608 -; FnSpecialization: FormalArg = binop1, ActualArg = power -; FnSpecialization: FormalArg = binop2, ActualArg = mul -; FnSpecialization: Gain = 982 -; FnSpecialization: FormalArg = binop1, ActualArg = plus -; FnSpecialization: FormalArg = binop2, ActualArg = minus -; FnSpecialization: Gain = 795 -; FnSpecialization: FormalArg = binop1, ActualArg = minus -; FnSpecialization: FormalArg = binop2, ActualArg = power +; +; Score(@plus, @minus) > Score(42, @minus, @power) > Score(@power, @mul) define i64 @main(i64 %x, i64 %y, i1 %flag) { ; NONE-LABEL: @main( @@ -116,11 +108,11 @@ merge: ; ; THREE-LABEL: define internal i64 @compute.3(i64 %x, i64 %y, ptr %binop1, ptr %binop2) { ; THREE-NEXT: entry: -; THREE-NEXT: [[TMP0:%.+]] = call i64 @minus(i64 %x, i64 %y) -; THREE-NEXT: [[TMP1:%.+]] = call i64 @power(i64 %x, i64 %y) +; THREE-NEXT: [[TMP0:%.+]] = call i64 @minus(i64 %x, i64 42) +; THREE-NEXT: [[TMP1:%.+]] = call i64 @power(i64 %x, i64 42) ; THREE-NEXT: [[TMP2:%.+]] = add i64 [[TMP0]], [[TMP1]] ; THREE-NEXT: [[TMP3:%.+]] = sdiv i64 [[TMP2]], %x -; THREE-NEXT: [[TMP4:%.+]] = sub i64 [[TMP3]], %y +; THREE-NEXT: [[TMP4:%.+]] = sub i64 [[TMP3]], 42 ; THREE-NEXT: [[TMP5:%.+]] = mul i64 [[TMP4]], 2 ; THREE-NEXT: ret i64 [[TMP5]] ; THREE-NEXT: } diff --git a/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp b/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp index 16c9a505e4498..c6516bbe58051 100644 --- a/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp +++ b/llvm/unittests/Transforms/IPO/FunctionSpecializationTest.cpp @@ -85,7 +85,10 @@ class FunctionSpecializationTest : public testing::Test { auto &TTI = FAM.getResult(*I.getFunction()); auto &BFI = FAM.getResult(*I.getFunction()); - return BFI.getBlockFreq(I.getParent()).getFrequency() / BFI.getEntryFreq() * + uint64_t Weight = FunctionSpecializer::getBlockFreqMultiplier() * + BFI.getBlockFreq(I.getParent()).getFrequency() / + BFI.getEntryFreq(); + return Weight * TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency); } };