Skip to content

Commit

Permalink
Revert "Inlining: Run the legacy AlwaysInliner before the regular inl…
Browse files Browse the repository at this point in the history
…iner."

This seems to cause large regressions in existing code, as much as 75% slower
(4x the time taken). Small always inline functions seem to be used a lot in the
cmsis-dsp library.

I would add a phase ordering test to show the problems, but one already exists!
The llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll was just changed by
removing alwaysinline to hide the problems that existed.

This reverts commit cae033d.
This reverts commit 8e33c41.
  • Loading branch information
davemgreen committed Feb 10, 2023
1 parent c4a250e commit 86bfeb9
Show file tree
Hide file tree
Showing 14 changed files with 397 additions and 560 deletions.
1 change: 1 addition & 0 deletions clang/test/CodeGen/code-coverage.c
Expand Up @@ -14,6 +14,7 @@
// RUN: %clang_cc1 -emit-llvm-bc -o /dev/null -fdebug-pass-manager -fprofile-arcs %s 2>&1 | FileCheck --check-prefix=NEWPM %s
// RUN: %clang_cc1 -emit-llvm-bc -o /dev/null -fdebug-pass-manager -fprofile-arcs -O3 %s 2>&1 | FileCheck --check-prefix=NEWPM-O3 %s

// NEWPM-NOT: Running pass
// NEWPM: Running pass: GCOVProfilerPass

// NEWPM-O3-NOT: Running pass
Expand Down
Expand Up @@ -73,7 +73,7 @@ void bar(int x) {
// THRESHOLD-NOT: hotness
// NO_PGO: '-fdiagnostics-show-hotness' requires profile-guided optimization information
// NO_PGO: '-fdiagnostics-hotness-threshold=' requires profile-guided optimization information
// expected-remark@+1 {{'foo' inlined into 'bar' with (cost=always): always inline attribute at callsite bar:8:10; (hotness:}}
// expected-remark@+1 {{'foo' inlined into 'bar': always inline attribute at callsite bar:8:10; (hotness:}}
sum += foo(x, x - 2);
}

Expand Down
751 changes: 376 additions & 375 deletions clang/test/Headers/__clang_hip_math.hip

Large diffs are not rendered by default.

15 changes: 8 additions & 7 deletions clang/test/OpenMP/bug57757.cpp
Expand Up @@ -32,23 +32,24 @@ void foo() {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP1]], i64 0, i32 2
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META13:![0-9]+]])
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !tbaa [[TBAA16:![0-9]+]], !alias.scope !13, !noalias !17
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]])
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !tbaa [[TBAA18:![0-9]+]], !alias.scope !13, !noalias !16
// CHECK-NEXT: switch i32 [[TMP3]], label [[DOTOMP_OUTLINED__EXIT:%.*]] [
// CHECK-NEXT: i32 0, label [[DOTUNTIED_JMP__I:%.*]]
// CHECK-NEXT: i32 1, label [[DOTUNTIED_NEXT__I:%.*]]
// CHECK-NEXT: ]
// CHECK: .untied.jmp..i:
// CHECK-NEXT: store i32 1, ptr [[TMP2]], align 4, !tbaa [[TBAA16]], !alias.scope !13, !noalias !17
// CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @__kmpc_omp_task(ptr nonnull @[[GLOB1]], i32 [[TMP0]], ptr [[TMP1]]), !noalias !13
// CHECK-NEXT: store i32 1, ptr [[TMP2]], align 4, !tbaa [[TBAA18]], !alias.scope !13, !noalias !16
// CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @__kmpc_omp_task(ptr nonnull @[[GLOB1]], i32 [[TMP0]], ptr [[TMP1]]), !noalias !19
// CHECK-NEXT: br label [[DOTOMP_OUTLINED__EXIT]]
// CHECK: .untied.next..i:
// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP1]], i64 0, i32 1
// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP1]], i64 0, i32 1, i32 2
// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP1]], i64 0, i32 1, i32 1
// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[TBAA19:![0-9]+]], !noalias !13
// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA16]], !noalias !13
// CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP6]], align 4, !tbaa [[TBAA20:![0-9]+]], !noalias !13
// CHECK-NEXT: tail call void [[TMP8]](i32 noundef [[TMP9]], float noundef [[TMP10]]) #[[ATTR2:[0-9]+]], !noalias !13
// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[TBAA20:![0-9]+]], !alias.scope !16, !noalias !13
// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA18]], !alias.scope !16, !noalias !13
// CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP6]], align 4, !tbaa [[TBAA21:![0-9]+]], !alias.scope !16, !noalias !13
// CHECK-NEXT: tail call void [[TMP8]](i32 noundef [[TMP9]], float noundef [[TMP10]]) #[[ATTR2:[0-9]+]], !noalias !19
// CHECK-NEXT: br label [[DOTOMP_OUTLINED__EXIT]]
// CHECK: .omp_outlined..exit:
// CHECK-NEXT: ret i32 0
Expand Down
4 changes: 1 addition & 3 deletions llvm/lib/Passes/PassBuilderPipelines.cpp
Expand Up @@ -161,7 +161,7 @@ static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
cl::desc("Enable module inliner"));

static cl::opt<bool> PerformMandatoryInliningsFirst(
"mandatory-inlining-first", cl::init(false), cl::Hidden,
"mandatory-inlining-first", cl::init(true), cl::Hidden,
cl::desc("Perform mandatory inlinings module-wide, before performing "
"inlining"));

Expand Down Expand Up @@ -1079,8 +1079,6 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
if (EnableSyntheticCounts && !PGOOpt)
MPM.addPass(SyntheticCountsPropagation());

MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));

if (EnableModuleInliner)
MPM.addPass(buildModuleInlinerPipeline(Level, Phase));
else
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Other/new-pm-defaults.ll
Expand Up @@ -122,8 +122,6 @@
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
; CHECK-EP-PEEPHOLE-NEXT: Running pass: NoOpFunctionPass
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
; CHECK-O-NEXT: Running pass: AlwaysInlinerPass
; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis
; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass
; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA
Expand All @@ -132,12 +130,14 @@
; CHECK-O-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}AAManager
; CHECK-O-NEXT: Invalidating analysis: AAManager
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}>
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass
; CHECK-O-NEXT: Running pass: InlinerPass
; CHECK-O-NEXT: Running pass: InlinerPass
; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass
; CHECK-O-NEXT: Running analysis: AAManager
; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Other/new-pm-print-pipeline.ll
Expand Up @@ -62,7 +62,7 @@
; CHECK-20: cgscc(inline<only-mandatory>,inline),cgscc(inline)

; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='scc-oz-module-inliner' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-21
; CHECK-21: require<globals-aa>,function(invalidate<aa>),require<profile-summary>,cgscc(devirt<4>(inline,{{.*}},instcombine{{.*}}))
; CHECK-21: require<globals-aa>,function(invalidate<aa>),require<profile-summary>,cgscc(devirt<4>(inline<only-mandatory>,inline,{{.*}},instcombine{{.*}}))

; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='cgscc(function<eager-inv>(no-op-function)),function<eager-inv>(no-op-function)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-22
; CHECK-22: cgscc(function<eager-inv>(no-op-function)),function<eager-inv>(no-op-function)
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Other/new-pm-thinlto-defaults.ll
Expand Up @@ -99,8 +99,6 @@
; CHECK-O-NEXT: Running analysis: TypeBasedAA
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
; CHECK-O-NEXT: Running pass: AlwaysInlinerPass
; CHECK-PRELINK-O-NEXT: Running analysis: ProfileSummaryAnalysis
; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass
; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA
Expand All @@ -109,12 +107,14 @@
; CHECK-O-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}AAManager
; CHECK-O-NEXT: Invalidating analysis: AAManager
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
; CHECK-PRELINK-O-NEXT: Running analysis: ProfileSummaryAnalysis
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass
; CHECK-O-NEXT: Running pass: InlinerPass
; CHECK-O-NEXT: Running pass: InlinerPass
; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass
; CHECK-O-NEXT: Running analysis: AAManager
; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
Expand Up @@ -63,7 +63,6 @@
; CHECK-O123-NEXT: Running analysis: BranchProbabilityAnalysis on foo
; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis on foo
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
; CHECK-O-NEXT: Running pass: AlwaysInlinerPass
; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass
; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA
Expand All @@ -78,6 +77,7 @@
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}>
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass
; CHECK-O-NEXT: Running pass: InlinerPass
; CHECK-O-NEXT: Running pass: InlinerPass
; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass
; CHECK-O-NEXT: Running analysis: AAManager
; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass
Expand Down
Expand Up @@ -73,7 +73,6 @@
; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis on foo
; CHECK-O-NEXT: Running pass: SimplifyCFGPass on foo

; CHECK-O-NEXT: Running pass: AlwaysInlinerPass
; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass
; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA
Expand All @@ -87,6 +86,7 @@
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass
; CHECK-O-NEXT: Running pass: InlinerPass
; CHECK-O-NEXT: Running pass: InlinerPass
; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass
; CHECK-O-NEXT: Running analysis: AAManager
; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
Expand Up @@ -87,7 +87,6 @@
; CHECK-O-NEXT: Running pass: PGOIndirectCallPromotion on
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis on foo
; CHECK-O-NEXT: Running pass: AlwaysInlinerPass
; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass
; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA
Expand All @@ -102,6 +101,7 @@
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}>
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass
; CHECK-O-NEXT: Running pass: InlinerPass
; CHECK-O-NEXT: Running pass: InlinerPass
; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass
; CHECK-O-NEXT: Running analysis: AAManager
; CHECK-O-NEXT: Running analysis: BasicAA
Expand Down
Expand Up @@ -68,7 +68,6 @@
; CHECK-O123-NEXT: Running analysis: BranchProbabilityAnalysis on foo
; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis on foo
; CHECK-O-NEXT: Running pass: SimplifyCFGPass on foo
; CHECK-O-NEXT: Running pass: AlwaysInlinerPass
; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass
; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA
Expand All @@ -82,6 +81,7 @@
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}>
; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass
; CHECK-O-NEXT: Running pass: InlinerPass
; CHECK-O-NEXT: Running pass: InlinerPass
; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass
; CHECK-O-NEXT: Running analysis: AAManager
; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass
Expand Down
164 changes: 0 additions & 164 deletions llvm/test/Transforms/Inline/always-inline-phase-ordering.ll

This file was deleted.

2 changes: 1 addition & 1 deletion llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll
Expand Up @@ -218,5 +218,5 @@ declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1

attributes #0 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m55" "target-features"="+armv8.1-m.main,+dsp,+fp-armv8d16,+fp-armv8d16sp,+fp16,+fp64,+fullfp16,+hwdiv,+lob,+mve,+mve.fp,+ras,+strict-align,+thumb-mode,+vfp2,+vfp2sp,+vfp3d16,+vfp3d16sp,+vfp4d16,+vfp4d16sp,-aes,-bf16,-cdecp0,-cdecp1,-cdecp2,-cdecp3,-cdecp4,-cdecp5,-cdecp6,-cdecp7,-crc,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8sp,-fp16fml,-hwdiv-arm,-i8mm,-neon,-sb,-sha2,-vfp3,-vfp3sp,-vfp4,-vfp4sp" "unsafe-fp-math"="true" }
attributes #1 = { argmemonly nofree nosync nounwind willreturn }
attributes #2 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m55" "target-features"="+armv8.1-m.main,+dsp,+fp-armv8d16,+fp-armv8d16sp,+fp16,+fp64,+fullfp16,+hwdiv,+lob,+mve,+mve.fp,+ras,+strict-align,+thumb-mode,+vfp2,+vfp2sp,+vfp3d16,+vfp3d16sp,+vfp4d16,+vfp4d16sp,-aes,-bf16,-cdecp0,-cdecp1,-cdecp2,-cdecp3,-cdecp4,-cdecp5,-cdecp6,-cdecp7,-crc,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8sp,-fp16fml,-hwdiv-arm,-i8mm,-neon,-sb,-sha2,-vfp3,-vfp3sp,-vfp4,-vfp4sp" "unsafe-fp-math"="true" }
attributes #2 = { alwaysinline nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m55" "target-features"="+armv8.1-m.main,+dsp,+fp-armv8d16,+fp-armv8d16sp,+fp16,+fp64,+fullfp16,+hwdiv,+lob,+mve,+mve.fp,+ras,+strict-align,+thumb-mode,+vfp2,+vfp2sp,+vfp3d16,+vfp3d16sp,+vfp4d16,+vfp4d16sp,-aes,-bf16,-cdecp0,-cdecp1,-cdecp2,-cdecp3,-cdecp4,-cdecp5,-cdecp6,-cdecp7,-crc,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8sp,-fp16fml,-hwdiv-arm,-i8mm,-neon,-sb,-sha2,-vfp3,-vfp3sp,-vfp4,-vfp4sp" "unsafe-fp-math"="true" }
attributes #3 = { nounwind }

0 comments on commit 86bfeb9

Please sign in to comment.