Skip to content

Commit

Permalink
[OpenMP][IRBuilder] Support allocas in nested parallel regions
Browse files Browse the repository at this point in the history
We need to keep track of the alloca insertion point (which we already
communicate via the callback to the user) as we place allocas as well.

Reviewed By: fghanim, SouraVX

Differential Revision: https://reviews.llvm.org/D82470
  • Loading branch information
jdoerfert committed Jul 30, 2020
1 parent 622e461 commit 19756ef
Show file tree
Hide file tree
Showing 4 changed files with 244 additions and 25 deletions.
8 changes: 5 additions & 3 deletions clang/lib/CodeGen/CGStmtOpenMP.cpp
Expand Up @@ -1707,9 +1707,11 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {

CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
Builder.restoreIP(OMPBuilder.CreateParallel(Builder, BodyGenCB, PrivCB,
FiniCB, IfCond, NumThreads,
ProcBind, S.hasCancel()));
llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
Builder.restoreIP(
OMPBuilder.CreateParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB,
IfCond, NumThreads, ProcBind, S.hasCancel()));
return;
}

Expand Down
10 changes: 6 additions & 4 deletions llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
Expand Up @@ -156,6 +156,7 @@ class OpenMPIRBuilder {
/// Generator for '#omp parallel'
///
/// \param Loc The insert and source location description.
/// \param AllocaIP The insertion points to be used for alloca instructions.
/// \param BodyGenCB Callback that will generate the region code.
/// \param PrivCB Callback to copy a given variable (think copy constructor).
/// \param FiniCB Callback to finalize variable copies.
Expand All @@ -166,10 +167,11 @@ class OpenMPIRBuilder {
///
/// \returns The insertion position *after* the parallel.
IRBuilder<>::InsertPoint
CreateParallel(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB,
Value *IfCondition, Value *NumThreads,
omp::ProcBindKind ProcBind, bool IsCancellable);
CreateParallel(const LocationDescription &Loc, InsertPointTy AllocaIP,
BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
FinalizeCallbackTy FiniCB, Value *IfCondition,
Value *NumThreads, omp::ProcBindKind ProcBind,
bool IsCancellable);

/// Generator for '#omp flush'
///
Expand Down
25 changes: 16 additions & 9 deletions llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
Expand Up @@ -394,9 +394,10 @@ void OpenMPIRBuilder::emitCancelationCheckImpl(
}

IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition,
Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable) {
const LocationDescription &Loc, InsertPointTy OuterAllocaIP,
BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads,
omp::ProcBindKind ProcBind, bool IsCancellable) {
if (!updateToLocation(Loc))
return Loc.IP;

Expand Down Expand Up @@ -429,7 +430,9 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
// we want to delete at the end.
SmallVector<Instruction *, 4> ToBeDeleted;

Builder.SetInsertPoint(OuterFn->getEntryBlock().getFirstNonPHI());
// Change the location to the outer alloca insertion point to create and
// initialize the allocas we pass into the parallel region.
Builder.restoreIP(OuterAllocaIP);
AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr");
AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr");

Expand Down Expand Up @@ -481,9 +484,9 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(

// Generate the privatization allocas in the block that will become the entry
// of the outlined function.
InsertPointTy AllocaIP(PRegEntryBB,
PRegEntryBB->getTerminator()->getIterator());
Builder.restoreIP(AllocaIP);
Builder.SetInsertPoint(PRegEntryBB->getTerminator());
InsertPointTy InnerAllocaIP = Builder.saveIP();

AllocaInst *PrivTIDAddr =
Builder.CreateAlloca(Int32, nullptr, "tid.addr.local");
Instruction *PrivTID = Builder.CreateLoad(PrivTIDAddr, "tid");
Expand Down Expand Up @@ -512,7 +515,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
// Let the caller create the body.
assert(BodyGenCB && "Expected body generation callback!");
InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin());
BodyGenCB(AllocaIP, CodeGenIP, *PRegPreFiniBB);
BodyGenCB(InnerAllocaIP, CodeGenIP, *PRegPreFiniBB);

LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n");

Expand Down Expand Up @@ -671,7 +674,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
ReplacementValue = PrivTID;
} else {
Builder.restoreIP(
PrivCB(AllocaIP, Builder.saveIP(), V, ReplacementValue));
PrivCB(InnerAllocaIP, Builder.saveIP(), V, ReplacementValue));
assert(ReplacementValue &&
"Expected copy/create callback to set replacement value!");
if (ReplacementValue == &V)
Expand All @@ -686,6 +689,10 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n");
PrivHelper(*Input);
}
LLVM_DEBUG({
for (Value *Output : Outputs)
LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n");
});
assert(Outputs.empty() &&
"OpenMP outlining should not produce live-out values!");

Expand Down
226 changes: 217 additions & 9 deletions llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
Expand Up @@ -6,13 +6,14 @@
//
//===----------------------------------------------------------------------===//

#include "llvm/Frontend/OpenMP/OMPConstants.h"
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "gtest/gtest.h"
Expand Down Expand Up @@ -360,9 +361,11 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) {

auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; };

IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
F->getEntryBlock().getFirstInsertionPt());
IRBuilder<>::InsertPoint AfterIP =
OMPBuilder.CreateParallel(Loc, BodyGenCB, PrivCB, FiniCB, nullptr,
nullptr, OMP_PROC_BIND_default, false);
OMPBuilder.CreateParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
nullptr, nullptr, OMP_PROC_BIND_default, false);
EXPECT_EQ(NumBodiesGenerated, 1U);
EXPECT_EQ(NumPrivatizedVars, 1U);
EXPECT_EQ(NumFinalizationPoints, 1U);
Expand Down Expand Up @@ -400,6 +403,205 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) {
EXPECT_EQ(ForkCI->getArgOperand(3), F->arg_begin());
}

TEST_F(OpenMPIRBuilderTest, ParallelNested) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);
OMPBuilder.initialize();
F->setName("func");
IRBuilder<> Builder(BB);

OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});

unsigned NumInnerBodiesGenerated = 0;
unsigned NumOuterBodiesGenerated = 0;
unsigned NumFinalizationPoints = 0;

auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
BasicBlock &ContinuationIP) {
++NumInnerBodiesGenerated;
};

auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
Value &VPtr, Value *&ReplacementValue) -> InsertPointTy {
// Trivial copy (=firstprivate).
Builder.restoreIP(AllocaIP);
Type *VTy = VPtr.getType()->getPointerElementType();
Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload");
ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy");
Builder.restoreIP(CodeGenIP);
Builder.CreateStore(V, ReplacementValue);
return CodeGenIP;
};

auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; };

auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
BasicBlock &ContinuationIP) {
++NumOuterBodiesGenerated;
Builder.restoreIP(CodeGenIP);
BasicBlock *CGBB = CodeGenIP.getBlock();
BasicBlock *NewBB = SplitBlock(CGBB, &*CodeGenIP.getPoint());
CGBB->getTerminator()->eraseFromParent();
;

IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel(
InsertPointTy(CGBB, CGBB->end()), AllocaIP, InnerBodyGenCB, PrivCB,
FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false);

Builder.restoreIP(AfterIP);
Builder.CreateBr(NewBB);
};

IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
F->getEntryBlock().getFirstInsertionPt());
IRBuilder<>::InsertPoint AfterIP =
OMPBuilder.CreateParallel(Loc, AllocaIP, OuterBodyGenCB, PrivCB, FiniCB,
nullptr, nullptr, OMP_PROC_BIND_default, false);

EXPECT_EQ(NumInnerBodiesGenerated, 1U);
EXPECT_EQ(NumOuterBodiesGenerated, 1U);
EXPECT_EQ(NumFinalizationPoints, 2U);

Builder.restoreIP(AfterIP);
Builder.CreateRetVoid();

OMPBuilder.finalize();

EXPECT_EQ(M->size(), 5U);
for (Function &OutlinedFn : *M) {
if (F == &OutlinedFn || OutlinedFn.isDeclaration())
continue;
EXPECT_FALSE(verifyModule(*M, &errs()));
EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind));
EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoRecurse));
EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias));
EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias));

EXPECT_TRUE(OutlinedFn.hasInternalLinkage());
EXPECT_EQ(OutlinedFn.arg_size(), 2U);

EXPECT_EQ(OutlinedFn.getNumUses(), 1U);
User *Usr = OutlinedFn.user_back();
ASSERT_TRUE(isa<ConstantExpr>(Usr));
CallInst *ForkCI = dyn_cast<CallInst>(Usr->user_back());
ASSERT_NE(ForkCI, nullptr);

EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
EXPECT_EQ(ForkCI->getNumArgOperands(), 3U);
EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
EXPECT_EQ(ForkCI->getArgOperand(1),
ConstantInt::get(Type::getInt32Ty(Ctx), 0U));
EXPECT_EQ(ForkCI->getArgOperand(2), Usr);
}
}

TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);
OMPBuilder.initialize();
F->setName("func");
IRBuilder<> Builder(BB);

OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});

unsigned NumInnerBodiesGenerated = 0;
unsigned NumOuterBodiesGenerated = 0;
unsigned NumFinalizationPoints = 0;

auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
BasicBlock &ContinuationIP) {
++NumInnerBodiesGenerated;
};

auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
Value &VPtr, Value *&ReplacementValue) -> InsertPointTy {
// Trivial copy (=firstprivate).
Builder.restoreIP(AllocaIP);
Type *VTy = VPtr.getType()->getPointerElementType();
Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload");
ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy");
Builder.restoreIP(CodeGenIP);
Builder.CreateStore(V, ReplacementValue);
return CodeGenIP;
};

auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; };

auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
BasicBlock &ContinuationIP) {
++NumOuterBodiesGenerated;
Builder.restoreIP(CodeGenIP);
BasicBlock *CGBB = CodeGenIP.getBlock();
BasicBlock *NewBB1 = SplitBlock(CGBB, &*CodeGenIP.getPoint());
BasicBlock *NewBB2 = SplitBlock(NewBB1, &*NewBB1->getFirstInsertionPt());
CGBB->getTerminator()->eraseFromParent();
;
NewBB1->getTerminator()->eraseFromParent();
;

IRBuilder<>::InsertPoint AfterIP1 = OMPBuilder.CreateParallel(
InsertPointTy(CGBB, CGBB->end()), AllocaIP, InnerBodyGenCB, PrivCB,
FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false);

Builder.restoreIP(AfterIP1);
Builder.CreateBr(NewBB1);

IRBuilder<>::InsertPoint AfterIP2 = OMPBuilder.CreateParallel(
InsertPointTy(NewBB1, NewBB1->end()), AllocaIP, InnerBodyGenCB, PrivCB,
FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false);

Builder.restoreIP(AfterIP2);
Builder.CreateBr(NewBB2);
};

IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
F->getEntryBlock().getFirstInsertionPt());
IRBuilder<>::InsertPoint AfterIP =
OMPBuilder.CreateParallel(Loc, AllocaIP, OuterBodyGenCB, PrivCB, FiniCB,
nullptr, nullptr, OMP_PROC_BIND_default, false);

EXPECT_EQ(NumInnerBodiesGenerated, 2U);
EXPECT_EQ(NumOuterBodiesGenerated, 1U);
EXPECT_EQ(NumFinalizationPoints, 3U);

Builder.restoreIP(AfterIP);
Builder.CreateRetVoid();

OMPBuilder.finalize();

EXPECT_EQ(M->size(), 6U);
for (Function &OutlinedFn : *M) {
if (F == &OutlinedFn || OutlinedFn.isDeclaration())
continue;
EXPECT_FALSE(verifyModule(*M, &errs()));
EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoUnwind));
EXPECT_TRUE(OutlinedFn.hasFnAttribute(Attribute::NoRecurse));
EXPECT_TRUE(OutlinedFn.hasParamAttribute(0, Attribute::NoAlias));
EXPECT_TRUE(OutlinedFn.hasParamAttribute(1, Attribute::NoAlias));

EXPECT_TRUE(OutlinedFn.hasInternalLinkage());
EXPECT_EQ(OutlinedFn.arg_size(), 2U);

unsigned NumAllocas = 0;
for (Instruction &I : instructions(OutlinedFn))
NumAllocas += isa<AllocaInst>(I);
EXPECT_EQ(NumAllocas, 1U);

EXPECT_EQ(OutlinedFn.getNumUses(), 1U);
User *Usr = OutlinedFn.user_back();
ASSERT_TRUE(isa<ConstantExpr>(Usr));
CallInst *ForkCI = dyn_cast<CallInst>(Usr->user_back());
ASSERT_NE(ForkCI, nullptr);

EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
EXPECT_EQ(ForkCI->getNumArgOperands(), 3U);
EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
EXPECT_EQ(ForkCI->getArgOperand(1),
ConstantInt::get(Type::getInt32Ty(Ctx), 0U));
EXPECT_EQ(ForkCI->getArgOperand(2), Usr);
}
}

TEST_F(OpenMPIRBuilderTest, ParallelIfCond) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);
Expand Down Expand Up @@ -460,9 +662,12 @@ TEST_F(OpenMPIRBuilderTest, ParallelIfCond) {
// No destructors.
};

IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel(
Loc, BodyGenCB, PrivCB, FiniCB, Builder.CreateIsNotNull(F->arg_begin()),
nullptr, OMP_PROC_BIND_default, false);
IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
F->getEntryBlock().getFirstInsertionPt());
IRBuilder<>::InsertPoint AfterIP =
OMPBuilder.CreateParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
Builder.CreateIsNotNull(F->arg_begin()),
nullptr, OMP_PROC_BIND_default, false);

EXPECT_EQ(NumBodiesGenerated, 1U);
EXPECT_EQ(NumPrivatizedVars, 1U);
Expand Down Expand Up @@ -585,9 +790,12 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) {
{Builder.getInt32(NumFinalizationPoints)});
};

IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel(
Loc, BodyGenCB, PrivCB, FiniCB, Builder.CreateIsNotNull(F->arg_begin()),
nullptr, OMP_PROC_BIND_default, true);
IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
F->getEntryBlock().getFirstInsertionPt());
IRBuilder<>::InsertPoint AfterIP =
OMPBuilder.CreateParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
Builder.CreateIsNotNull(F->arg_begin()),
nullptr, OMP_PROC_BIND_default, true);

EXPECT_EQ(NumBodiesGenerated, 1U);
EXPECT_EQ(NumPrivatizedVars, 0U);
Expand Down

0 comments on commit 19756ef

Please sign in to comment.