Skip to content

Commit

Permalink
[OPENMP]Fix PR37671: Privatize local(private) variables in untied tasks.
Browse files Browse the repository at this point in the history
In untied tasks, need to allocate the space for local variales, declared
in task region, when the memory for task data is allocated. THe function
can be interrupted and we can exit from the function in untied task
switch. Need to keep the state of the local variables in this case.
Also, the compiler should not call cleanup when exiting in untied task
switch until the real exit out of the declaration scope is met during
 execution.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D84457
  • Loading branch information
alexey-bataev committed Aug 12, 2020
1 parent 3c8a4ee commit f4f3f67
Show file tree
Hide file tree
Showing 4 changed files with 233 additions and 76 deletions.
192 changes: 120 additions & 72 deletions clang/lib/CodeGen/CGOpenMPRuntime.cpp
Expand Up @@ -180,7 +180,7 @@ class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
UntiedCodeGen(CGF);
CodeGenFunction::JumpDest CurPoint =
CGF.getJumpDestInCurrentScope(".untied.next.");
CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
CGF.EmitBranch(CGF.ReturnBlock.getBlock());
CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
CGF.Builder.GetInsertBlock());
Expand Down Expand Up @@ -3370,10 +3370,14 @@ struct PrivateHelpersTy {
const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
: OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
PrivateElemInit(PrivateElemInit) {}
PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
const Expr *OriginalRef = nullptr;
const VarDecl *Original = nullptr;
const VarDecl *PrivateCopy = nullptr;
const VarDecl *PrivateElemInit = nullptr;
bool isLocalPrivate() const {
return !OriginalRef && !PrivateCopy && !PrivateElemInit;
}
};
typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
} // anonymous namespace
Expand All @@ -3390,6 +3394,11 @@ createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
for (const auto &Pair : Privates) {
const VarDecl *VD = Pair.second.Original;
QualType Type = VD->getType().getNonReferenceType();
// If the private variable is a local variable with lvalue ref type,
// allocate the pointer instead of the pointee type.
if (Pair.second.isLocalPrivate() &&
VD->getType()->isLValueReferenceType())
Type = C.getPointerType(Type);
FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
if (VD->hasAttrs()) {
for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
Expand Down Expand Up @@ -3643,10 +3652,7 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
/// \endcode
static llvm::Value *
emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
ArrayRef<const Expr *> PrivateVars,
ArrayRef<const Expr *> FirstprivateVars,
ArrayRef<const Expr *> LastprivateVars,
QualType PrivatesQTy,
const OMPTaskDataTy &Data, QualType PrivatesQTy,
ArrayRef<PrivateDataTy> Privates) {
ASTContext &C = CGM.getContext();
FunctionArgList Args;
Expand All @@ -3655,9 +3661,9 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
C.getPointerType(PrivatesQTy).withConst().withRestrict(),
ImplicitParamDecl::Other);
Args.push_back(&TaskPrivatesArg);
llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
unsigned Counter = 1;
for (const Expr *E : PrivateVars) {
for (const Expr *E : Data.PrivateVars) {
Args.push_back(ImplicitParamDecl::Create(
C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
C.getPointerType(C.getPointerType(E->getType()))
Expand All @@ -3668,7 +3674,7 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
PrivateVarsPos[VD] = Counter;
++Counter;
}
for (const Expr *E : FirstprivateVars) {
for (const Expr *E : Data.FirstprivateVars) {
Args.push_back(ImplicitParamDecl::Create(
C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
C.getPointerType(C.getPointerType(E->getType()))
Expand All @@ -3679,7 +3685,7 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
PrivateVarsPos[VD] = Counter;
++Counter;
}
for (const Expr *E : LastprivateVars) {
for (const Expr *E : Data.LastprivateVars) {
Args.push_back(ImplicitParamDecl::Create(
C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
C.getPointerType(C.getPointerType(E->getType()))
Expand All @@ -3690,6 +3696,17 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
PrivateVarsPos[VD] = Counter;
++Counter;
}
for (const VarDecl *VD : Data.PrivateLocals) {
QualType Ty = VD->getType().getNonReferenceType();
if (VD->getType()->isLValueReferenceType())
Ty = C.getPointerType(Ty);
Args.push_back(ImplicitParamDecl::Create(
C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
ImplicitParamDecl::Other));
PrivateVarsPos[VD] = Counter;
++Counter;
}
const auto &TaskPrivatesMapFnInfo =
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
llvm::FunctionType *TaskPrivatesMapTy =
Expand Down Expand Up @@ -3762,6 +3779,9 @@ static void emitPrivatesInit(CodeGenFunction &CGF,
}
FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
for (const PrivateDataTy &Pair : Privates) {
// Do not initialize private locals.
if (Pair.second.isLocalPrivate())
continue;
const VarDecl *VD = Pair.second.PrivateCopy;
const Expr *Init = VD->getAnyInitializer();
if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
Expand Down Expand Up @@ -3852,6 +3872,8 @@ static bool checkInitIsRequired(CodeGenFunction &CGF,
ArrayRef<PrivateDataTy> Privates) {
bool InitRequired = false;
for (const PrivateDataTy &Pair : Privates) {
if (Pair.second.isLocalPrivate())
continue;
const VarDecl *VD = Pair.second.PrivateCopy;
const Expr *Init = VD->getAnyInitializer();
InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
Expand Down Expand Up @@ -3945,16 +3967,16 @@ emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
/// Checks if destructor function is required to be generated.
/// \return true if cleanups are required, false otherwise.
static bool
checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
bool NeedsCleanup = false;
auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
for (const FieldDecl *FD : PrivateRD->fields()) {
NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
if (NeedsCleanup)
break;
checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
ArrayRef<PrivateDataTy> Privates) {
for (const PrivateDataTy &P : Privates) {
if (P.second.isLocalPrivate())
continue;
QualType Ty = P.second.Original->getType().getNonReferenceType();
if (Ty.isDestructedType())
return true;
}
return NeedsCleanup;
return false;
}

namespace {
Expand Down Expand Up @@ -4124,9 +4146,12 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
/*PrivateElemInit=*/nullptr));
++I;
}
llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
return L.first > R.first;
});
for (const VarDecl *VD : Data.PrivateLocals)
Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
llvm::stable_sort(Privates,
[](const PrivateDataTy &L, const PrivateDataTy &R) {
return L.first > R.first;
});
QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
// Build type kmp_routine_entry_t (if not built yet).
emitKmpRoutineEntryT(KmpInt32Ty);
Expand Down Expand Up @@ -4168,9 +4193,8 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
std::next(TaskFunction->arg_begin(), 3)->getType();
if (!Privates.empty()) {
auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
TaskPrivatesMap = emitTaskPrivateMappingFunction(
CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
FI->getType(), Privates);
TaskPrivatesMap =
emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
TaskPrivatesMap, TaskPrivatesMapTy);
} else {
Expand Down Expand Up @@ -4200,7 +4224,8 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
unsigned Flags = Data.Tied ? TiedFlag : 0;
bool NeedsCleanup = false;
if (!Privates.empty()) {
NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
NeedsCleanup =
checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
if (NeedsCleanup)
Flags = Flags | DestructorsFlag;
}
Expand Down Expand Up @@ -11231,56 +11256,64 @@ Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
if (!VD)
return Address::invalid();
const VarDecl *CVD = VD->getCanonicalDecl();
if (!CVD->hasAttr<OMPAllocateDeclAttr>())
if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
// Use the default allocation.
if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
!AA->getAllocator())
return Address::invalid();
llvm::Value *Size;
CharUnits Align = CGM.getContext().getDeclAlign(CVD);
if (CVD->getType()->isVariablyModifiedType()) {
Size = CGF.getTypeSize(CVD->getType());
// Align the size: ((size + align - 1) / align) * align
Size = CGF.Builder.CreateNUWAdd(
Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
} else {
CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
Size = CGM.getSize(Sz.alignTo(Align));
}
llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
assert(AA->getAllocator() &&
"Expected allocator expression for non-default allocator.");
llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
// According to the standard, the original allocator type is a enum
// (integer). Convert to pointer type, if required.
if (Allocator->getType()->isIntegerTy())
Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
else if (Allocator->getType()->isPointerTy())
Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
Allocator, CGM.VoidPtrTy);
llvm::Value *Args[] = {ThreadID, Size, Allocator};

llvm::Value *Addr =
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_alloc),
Args, getName({CVD->getName(), ".void.addr"}));
llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
Allocator};
llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_free);

CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
llvm::makeArrayRef(FiniArgs));
Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
Addr,
CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
getName({CVD->getName(), ".addr"}));
return Address(Addr, Align);
}
if (UntiedLocalVarsStack.empty())
return Address::invalid();
const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
// Use the default allocation.
if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
!AA->getAllocator())
const UntiedLocalVarsAddressesMap &UntiedData = UntiedLocalVarsStack.back();
auto It = UntiedData.find(VD);
if (It == UntiedData.end())
return Address::invalid();
llvm::Value *Size;
CharUnits Align = CGM.getContext().getDeclAlign(CVD);
if (CVD->getType()->isVariablyModifiedType()) {
Size = CGF.getTypeSize(CVD->getType());
// Align the size: ((size + align - 1) / align) * align
Size = CGF.Builder.CreateNUWAdd(
Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
} else {
CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
Size = CGM.getSize(Sz.alignTo(Align));
}
llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
assert(AA->getAllocator() &&
"Expected allocator expression for non-default allocator.");
llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
// According to the standard, the original allocator type is a enum (integer).
// Convert to pointer type, if required.
if (Allocator->getType()->isIntegerTy())
Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
else if (Allocator->getType()->isPointerTy())
Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
CGM.VoidPtrTy);
llvm::Value *Args[] = {ThreadID, Size, Allocator};

llvm::Value *Addr =
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_alloc),
Args, getName({CVD->getName(), ".void.addr"}));
llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
Allocator};
llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_free);

CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
llvm::makeArrayRef(FiniArgs));
Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
Addr,
CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
getName({CVD->getName(), ".addr"}));
return Address(Addr, Align);
return It->second;
}

CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
Expand Down Expand Up @@ -11315,6 +11348,21 @@ CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
}

CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
CodeGenModule &CGM,
const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, Address> &LocalVars)
: CGM(CGM), NeedToPush(!LocalVars.empty()) {
if (!NeedToPush)
return;
CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
}

CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
if (!NeedToPush)
return;
CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
}

bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");

Expand Down
18 changes: 18 additions & 0 deletions clang/lib/CodeGen/CGOpenMPRuntime.h
Expand Up @@ -105,6 +105,7 @@ struct OMPTaskDataTy final {
SmallVector<const Expr *, 4> ReductionOrigs;
SmallVector<const Expr *, 4> ReductionCopies;
SmallVector<const Expr *, 4> ReductionOps;
SmallVector<CanonicalDeclPtr<const VarDecl>, 4> PrivateLocals;
struct DependData {
OpenMPDependClauseKind DepKind = OMPC_DEPEND_unknown;
const Expr *IteratorExpr = nullptr;
Expand Down Expand Up @@ -245,6 +246,19 @@ class CGOpenMPRuntime {
~NontemporalDeclsRAII();
};

/// Manages list of nontemporal decls for the specified directive.
class UntiedTaskLocalDeclsRAII {
CodeGenModule &CGM;
const bool NeedToPush;

public:
UntiedTaskLocalDeclsRAII(
CodeGenModule &CGM,
const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, Address>
&LocalVars);
~UntiedTaskLocalDeclsRAII();
};

/// Maps the expression for the lastprivate variable to the global copy used
/// to store new value because original variables are not mapped in inner
/// parallel regions. Only private copies are captured but we need also to
Expand Down Expand Up @@ -705,6 +719,10 @@ class CGOpenMPRuntime {
/// The set is the union of all current stack elements.
llvm::SmallVector<NontemporalDeclsSet, 4> NontemporalDeclsStack;

using UntiedLocalVarsAddressesMap =
llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, Address>;
llvm::SmallVector<UntiedLocalVarsAddressesMap, 4> UntiedLocalVarsStack;

/// Stack for list of addresses of declarations in current context marked as
/// lastprivate conditional. The set is the union of all current stack
/// elements.
Expand Down

0 comments on commit f4f3f67

Please sign in to comment.