Skip to content

Commit

Permalink
[mlir][Inliner] Use llvm::parallelForEach instead of llvm::parallelTr…
Browse files Browse the repository at this point in the history
…ansformReduce

llvm::parallelTransformReduce does not schedule work on the caller thread, which becomes very costly for
the inliner where a majority of SCCs are small, often ~1 element. The switch to llvm::parallelForEach solves this,
and also aligns the implementation with the PassManager (which realistically should share the same implementation).

This change dropped compile time on an internal benchmark by ~1(25%) second.

Differential Revision: https://reviews.llvm.org/D96086
  • Loading branch information
River707 committed Feb 23, 2021
1 parent 65a3197 commit abd3c6f
Showing 1 changed file with 17 additions and 12 deletions.
29 changes: 17 additions & 12 deletions mlir/lib/Transforms/Inliner.cpp
Expand Up @@ -688,8 +688,10 @@ InlinerPass::optimizeSCCAsync(MutableArrayRef<CallGraphNode *> nodesToVisit,
MLIRContext *context) {
// Ensure that there are enough pipeline maps for the optimizer to run in
// parallel.
size_t numThreads = llvm::hardware_concurrency().compute_thread_count();
if (opPipelines.size() != numThreads) {
size_t numThreads =
std::min((size_t)llvm::hardware_concurrency().compute_thread_count(),
nodesToVisit.size());
if (opPipelines.size() < numThreads) {
// Reserve before resizing so that we can use a reference to the first
// element.
opPipelines.reserve(numThreads);
Expand All @@ -706,14 +708,11 @@ InlinerPass::optimizeSCCAsync(MutableArrayRef<CallGraphNode *> nodesToVisit,

// Optimize the nodes of the SCC in parallel.
ParallelDiagnosticHandler optimizerHandler(context);
return llvm::parallelTransformReduce(
llvm::seq<size_t>(0, numThreads), success(),
[](LogicalResult lhs, LogicalResult rhs) {
return success(succeeded(lhs) && succeeded(rhs));
},
[&](size_t index) {
LogicalResult result = success();
for (auto e = nodesToVisit.size(); nodeIt < e && succeeded(result);) {
std::atomic<bool> passFailed(false);
llvm::parallelForEach(
opPipelines.begin(), std::next(opPipelines.begin(), numThreads),
[&](llvm::StringMap<OpPassManager> &pipelines) {
for (auto e = nodesToVisit.size(); !passFailed && nodeIt < e;) {
// Get the next available operation index.
unsigned nextID = nodeIt++;
if (nextID >= e)
Expand All @@ -722,11 +721,17 @@ InlinerPass::optimizeSCCAsync(MutableArrayRef<CallGraphNode *> nodesToVisit,
// Set the order for this thread so that diagnostics will be
// properly ordered, and reset after optimization has finished.
optimizerHandler.setOrderIDForThread(nextID);
result = optimizeCallable(nodesToVisit[nextID], opPipelines[index]);
LogicalResult pipelineResult =
optimizeCallable(nodesToVisit[nextID], pipelines);
optimizerHandler.eraseOrderIDForThread();

if (failed(pipelineResult)) {
passFailed = true;
break;
}
}
return result;
});
return failure(passFailed);
}

LogicalResult
Expand Down

0 comments on commit abd3c6f

Please sign in to comment.