Skip to content

Commit

Permalink
Reapply "[OpenMP][IRBuilder] Perform finalization (incl. outlining) l…
Browse files Browse the repository at this point in the history
…ate"

Reapply 8a56d64 with minor fixes.

The problem was that cancellation can cause new edges to the parallel
region exit block which is not outlined. The CodeExtractor will encode
the information which "exit" was taken as a return value. The fix is to
ensure we do not return any value from the outlined function, to prevent
control to value conversion we ensure a single exit block for the
outlined region.

This reverts commit 3aac953.
  • Loading branch information
jdoerfert committed Feb 13, 2020
1 parent a6f38b4 commit 70cac41
Show file tree
Hide file tree
Showing 5 changed files with 184 additions and 115 deletions.
9 changes: 9 additions & 0 deletions clang/lib/CodeGen/CodeGenFunction.cpp
Expand Up @@ -32,6 +32,7 @@
#include "clang/Basic/TargetInfo.h"
#include "clang/CodeGen/CGFunctionInfo.h"
#include "clang/Frontend/FrontendDiagnostic.h"
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/FPEnv.h"
Expand Down Expand Up @@ -104,6 +105,14 @@ CodeGenFunction::~CodeGenFunction() {

if (getLangOpts().OpenMP && CurFn)
CGM.getOpenMPRuntime().functionFinished(*this);

// If we have an OpenMPIRBuilder we want to finalize functions (incl.
// outlining etc) at some point. Doing it once the function codegen is done
// seems to be a reasonable spot. We do it here, as opposed to the deletion
// time of the CodeGenModule, because we have to ensure the IR has not yet
// been "emitted" to the outside, thus, modifications are still sensible.
if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder())
OMPBuilder->finalize();
}

// Map the LangOption for rounding mode into
Expand Down
17 changes: 17 additions & 0 deletions llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
Expand Up @@ -34,6 +34,9 @@ class OpenMPIRBuilder {
/// before any other method and only once!
void initialize();

/// Finalize the underlying module, e.g., by outlining regions.
void finalize();

/// Add attributes known for \p FnID to \p Fn.
void addAttributes(omp::RuntimeFunction FnID, Function &Fn);

Expand Down Expand Up @@ -254,6 +257,20 @@ class OpenMPIRBuilder {

/// Map to remember existing ident_t*.
DenseMap<std::pair<Constant *, uint64_t>, GlobalVariable *> IdentMap;

/// Helper that contains information about regions we need to outline
/// during finalization.
struct OutlineInfo {
SmallVector<BasicBlock *, 32> Blocks;
using PostOutlineCBTy = std::function<void(Function &)>;
PostOutlineCBTy PostOutlineCB;
};

/// Collection of regions that need to be outlined during finalization.
SmallVector<OutlineInfo, 16> OutlineInfos;

/// Add a new region that will be outlined later.
void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); }
};

} // end namespace llvm
Expand Down
247 changes: 145 additions & 102 deletions llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
Expand Up @@ -93,6 +93,57 @@ Function *OpenMPIRBuilder::getOrCreateRuntimeFunction(RuntimeFunction FnID) {

void OpenMPIRBuilder::initialize() { initializeTypes(M); }

void OpenMPIRBuilder::finalize() {
for (OutlineInfo &OI : OutlineInfos) {
assert(!OI.Blocks.empty() &&
"Outlined regions should have at least a single block!");
BasicBlock *RegEntryBB = OI.Blocks.front();
Function *OuterFn = RegEntryBB->getParent();
CodeExtractorAnalysisCache CEAC(*OuterFn);
CodeExtractor Extractor(OI.Blocks, /* DominatorTree */ nullptr,
/* AggregateArgs */ false,
/* BlockFrequencyInfo */ nullptr,
/* BranchProbabilityInfo */ nullptr,
/* AssumptionCache */ nullptr,
/* AllowVarArgs */ true,
/* AllowAlloca */ true,
/* Suffix */ ".omp_par");

LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n");

Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);

LLVM_DEBUG(dbgs() << "After outlining: " << *OuterFn << "\n");
LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n");
assert(OutlinedFn->getReturnType()->isVoidTy() &&
"OpenMP outlined functions should not return a value!");

// For compability with the clang CG we move the outlined function after the
// one with the parallel region.
OutlinedFn->removeFromParent();
M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn);

// Remove the artificial entry introduced by the extractor right away, we
// made our own entry block after all.
{
BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock();
assert(ArtificialEntry.getUniqueSuccessor() == RegEntryBB);
assert(RegEntryBB->getUniquePredecessor() == &ArtificialEntry);
RegEntryBB->moveBefore(&ArtificialEntry);
ArtificialEntry.eraseFromParent();
}
assert(&OutlinedFn->getEntryBlock() == RegEntryBB);
assert(OutlinedFn && OutlinedFn->getNumUses() == 1);

// Run a user callback, e.g. to add attributes.
if (OI.PostOutlineCB)
OI.PostOutlineCB(*OutlinedFn);
}

// Allow finalize to be called multiple times.
OutlineInfos.clear();
}

Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr,
IdentFlag LocFlags) {
// Enable "C-mode".
Expand Down Expand Up @@ -415,32 +466,40 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
// PRegionExitBB <- A common exit to simplify block collection.
//

LLVM_DEBUG(dbgs() << "Before body codegen: " << *UI->getFunction() << "\n");
LLVM_DEBUG(dbgs() << "Before body codegen: " << *OuterFn << "\n");

// Let the caller create the body.
assert(BodyGenCB && "Expected body generation callback!");
InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin());
BodyGenCB(AllocaIP, CodeGenIP, *PRegPreFiniBB);

LLVM_DEBUG(dbgs() << "After body codegen: " << *UI->getFunction() << "\n");
LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n");

OutlineInfo OI;
SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
SmallVector<BasicBlock *, 32> ParallelRegionBlocks, Worklist;
SmallVector<BasicBlock *, 32> Worklist;
ParallelRegionBlockSet.insert(PRegEntryBB);
ParallelRegionBlockSet.insert(PRegExitBB);

// Collect all blocks in-between PRegEntryBB and PRegExitBB.
Worklist.push_back(PRegEntryBB);
while (!Worklist.empty()) {
BasicBlock *BB = Worklist.pop_back_val();
ParallelRegionBlocks.push_back(BB);
OI.Blocks.push_back(BB);
for (BasicBlock *SuccBB : successors(BB))
if (ParallelRegionBlockSet.insert(SuccBB).second)
Worklist.push_back(SuccBB);
}

// Ensure a single exit node for the outlined region by creating one.
// We might have multiple incoming edges to the exit now due to finalizations,
// e.g., cancel calls that cause the control flow to leave the region.
BasicBlock *PRegOutlinedExitBB = PRegExitBB;
PRegExitBB = SplitBlock(PRegExitBB, &*PRegExitBB->getFirstInsertionPt());
OI.Blocks.push_back(PRegOutlinedExitBB);

CodeExtractorAnalysisCache CEAC(*OuterFn);
CodeExtractor Extractor(ParallelRegionBlocks, /* DominatorTree */ nullptr,
CodeExtractor Extractor(OI.Blocks, /* DominatorTree */ nullptr,
/* AggregateArgs */ false,
/* BlockFrequencyInfo */ nullptr,
/* BranchProbabilityInfo */ nullptr,
Expand All @@ -455,7 +514,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands);

LLVM_DEBUG(dbgs() << "Before privatization: " << *UI->getFunction() << "\n");
LLVM_DEBUG(dbgs() << "Before privatization: " << *OuterFn << "\n");

FunctionCallee TIDRTLFn =
getOrCreateRuntimeFunction(OMPRTL___kmpc_global_thread_num);
Expand Down Expand Up @@ -491,61 +550,15 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n");
PrivHelper(*Input);
}
for (Value *Output : Outputs) {
LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n");
PrivHelper(*Output);
}
assert(Outputs.empty() &&
"OpenMP outlining should not produce live-out values!");

LLVM_DEBUG(dbgs() << "After privatization: " << *UI->getFunction() << "\n");
LLVM_DEBUG(dbgs() << "After privatization: " << *OuterFn << "\n");
LLVM_DEBUG({
for (auto *BB : ParallelRegionBlocks)
for (auto *BB : OI.Blocks)
dbgs() << " PBR: " << BB->getName() << "\n";
});

// Add some known attributes to the outlined function.
Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
OutlinedFn->addParamAttr(0, Attribute::NoAlias);
OutlinedFn->addParamAttr(1, Attribute::NoAlias);
OutlinedFn->addFnAttr(Attribute::NoUnwind);
OutlinedFn->addFnAttr(Attribute::NoRecurse);

LLVM_DEBUG(dbgs() << "After outlining: " << *UI->getFunction() << "\n");
LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n");

// For compability with the clang CG we move the outlined function after the
// one with the parallel region.
OutlinedFn->removeFromParent();
M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn);

// Remove the artificial entry introduced by the extractor right away, we
// made our own entry block after all.
{
BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock();
assert(ArtificialEntry.getUniqueSuccessor() == PRegEntryBB);
assert(PRegEntryBB->getUniquePredecessor() == &ArtificialEntry);
PRegEntryBB->moveBefore(&ArtificialEntry);
ArtificialEntry.eraseFromParent();
}
LLVM_DEBUG(dbgs() << "PP Outlined function: " << *OutlinedFn << "\n");
assert(&OutlinedFn->getEntryBlock() == PRegEntryBB);

assert(OutlinedFn && OutlinedFn->getNumUses() == 1);
assert(OutlinedFn->arg_size() >= 2 &&
"Expected at least tid and bounded tid as arguments");
unsigned NumCapturedVars = OutlinedFn->arg_size() - /* tid & bounded tid */ 2;

CallInst *CI = cast<CallInst>(OutlinedFn->user_back());
CI->getParent()->setName("omp_parallel");
Builder.SetInsertPoint(CI);

// Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn);
Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars),
Builder.CreateBitCast(OutlinedFn, ParallelTaskPtr)};

SmallVector<Value *, 16> RealArgs;
RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());

FunctionCallee RTLFn = getOrCreateRuntimeFunction(OMPRTL___kmpc_fork_call);
if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
Expand All @@ -558,75 +571,105 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
// callback callee.
F->addMetadata(
llvm::LLVMContext::MD_callback,
*llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
2, {-1, -1},
/* VarArgsArePassed */ true)}));
*llvm::MDNode::get(
Ctx, {MDB.createCallbackEncoding(2, {-1, -1},
/* VarArgsArePassed */ true)}));
}
}

Builder.CreateCall(RTLFn, RealArgs);
OI.PostOutlineCB = [=](Function &OutlinedFn) {
// Add some known attributes.
OutlinedFn.addParamAttr(0, Attribute::NoAlias);
OutlinedFn.addParamAttr(1, Attribute::NoAlias);
OutlinedFn.addFnAttr(Attribute::NoUnwind);
OutlinedFn.addFnAttr(Attribute::NoRecurse);

LLVM_DEBUG(dbgs() << "With fork_call placed: "
<< *Builder.GetInsertBlock()->getParent() << "\n");
assert(OutlinedFn.arg_size() >= 2 &&
"Expected at least tid and bounded tid as arguments");
unsigned NumCapturedVars =
OutlinedFn.arg_size() - /* tid & bounded tid */ 2;

InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end());
UI->eraseFromParent();
CallInst *CI = cast<CallInst>(OutlinedFn.user_back());
CI->getParent()->setName("omp_parallel");
Builder.SetInsertPoint(CI);

// Initialize the local TID stack location with the argument value.
Builder.SetInsertPoint(PrivTID);
Function::arg_iterator OutlinedAI = OutlinedFn->arg_begin();
Builder.CreateStore(Builder.CreateLoad(OutlinedAI), PrivTIDAddr);
// Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn);
Value *ForkCallArgs[] = {
Ident, Builder.getInt32(NumCapturedVars),
Builder.CreateBitCast(&OutlinedFn, ParallelTaskPtr)};

// If no "if" clause was present we do not need the call created during
// outlining, otherwise we reuse it in the serialized parallel region.
if (!ElseTI) {
CI->eraseFromParent();
} else {
SmallVector<Value *, 16> RealArgs;
RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());

// If an "if" clause was present we are now generating the serialized
// version into the "else" branch.
Builder.SetInsertPoint(ElseTI);
Builder.CreateCall(RTLFn, RealArgs);

// Build calls __kmpc_serialized_parallel(&Ident, GTid);
Value *SerializedParallelCallArgs[] = {Ident, ThreadID};
Builder.CreateCall(
getOrCreateRuntimeFunction(OMPRTL___kmpc_serialized_parallel),
SerializedParallelCallArgs);
LLVM_DEBUG(dbgs() << "With fork_call placed: "
<< *Builder.GetInsertBlock()->getParent() << "\n");

// OutlinedFn(&GTid, &zero, CapturedStruct);
CI->removeFromParent();
Builder.Insert(CI);
InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end());

// __kmpc_end_serialized_parallel(&Ident, GTid);
Value *EndArgs[] = {Ident, ThreadID};
Builder.CreateCall(
getOrCreateRuntimeFunction(OMPRTL___kmpc_end_serialized_parallel),
EndArgs);
// Initialize the local TID stack location with the argument value.
Builder.SetInsertPoint(PrivTID);
Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin();
Builder.CreateStore(Builder.CreateLoad(OutlinedAI), PrivTIDAddr);

LLVM_DEBUG(dbgs() << "With serialized parallel region: "
<< *Builder.GetInsertBlock()->getParent() << "\n");
}
// If no "if" clause was present we do not need the call created during
// outlining, otherwise we reuse it in the serialized parallel region.
if (!ElseTI) {
CI->eraseFromParent();
} else {

// If an "if" clause was present we are now generating the serialized
// version into the "else" branch.
Builder.SetInsertPoint(ElseTI);

// Build calls __kmpc_serialized_parallel(&Ident, GTid);
Value *SerializedParallelCallArgs[] = {Ident, ThreadID};
Builder.CreateCall(
getOrCreateRuntimeFunction(OMPRTL___kmpc_serialized_parallel),
SerializedParallelCallArgs);

// OutlinedFn(&GTid, &zero, CapturedStruct);
CI->removeFromParent();
Builder.Insert(CI);

// __kmpc_end_serialized_parallel(&Ident, GTid);
Value *EndArgs[] = {Ident, ThreadID};
Builder.CreateCall(
getOrCreateRuntimeFunction(OMPRTL___kmpc_end_serialized_parallel),
EndArgs);

LLVM_DEBUG(dbgs() << "With serialized parallel region: "
<< *Builder.GetInsertBlock()->getParent() << "\n");
}

for (Instruction *I : ToBeDeleted)
I->eraseFromParent();
};

// Adjust the finalization stack, verify the adjustment, and call the
// finalize function a last time to finalize values between the pre-fini block
// and the exit block if we left the parallel "the normal way".
// finalize function a last time to finalize values between the pre-fini
// block and the exit block if we left the parallel "the normal way".
auto FiniInfo = FinalizationStack.pop_back_val();
(void)FiniInfo;
assert(FiniInfo.DK == OMPD_parallel &&
"Unexpected finalization stack state!");

Instruction *PreFiniTI = PRegPreFiniBB->getTerminator();
assert(PreFiniTI->getNumSuccessors() == 1 &&
PreFiniTI->getSuccessor(0)->size() == 1 &&
isa<ReturnInst>(PreFiniTI->getSuccessor(0)->getTerminator()) &&
Instruction *PRegOutlinedExitTI = PRegOutlinedExitBB->getTerminator();
assert(PRegOutlinedExitTI->getNumSuccessors() == 1 &&
PRegOutlinedExitTI->getSuccessor(0) == PRegExitBB &&
"Unexpected CFG structure!");

InsertPointTy PreFiniIP(PRegPreFiniBB, PreFiniTI->getIterator());
InsertPointTy PreFiniIP(PRegOutlinedExitBB,
PRegOutlinedExitTI->getIterator());
FiniCB(PreFiniIP);

for (Instruction *I : ToBeDeleted)
I->eraseFromParent();
InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
UI->eraseFromParent();

// Register the outlined info.
addOutlineInfo(std::move(OI));

return AfterIP;
}
Expand Down
6 changes: 1 addition & 5 deletions llvm/lib/Transforms/Utils/CodeExtractor.cpp
Expand Up @@ -1405,11 +1405,7 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc,
DISubprogram *OldSP = OldFunc.getSubprogram();
LLVMContext &Ctx = OldFunc.getContext();

// See llvm.org/PR44560, OpenMP passes an invalid subprogram to CodeExtractor.
bool NeedWorkaroundForOpenMPIRBuilderBug =
OldSP && OldSP->getRetainedNodes()->isTemporary();

if (!OldSP || NeedWorkaroundForOpenMPIRBuilderBug) {
if (!OldSP) {
// Erase any debug info the new function contains.
stripDebugInfo(NewFunc);
// Make sure the old function doesn't contain any non-local metadata refs.
Expand Down

0 comments on commit 70cac41

Please sign in to comment.