Skip to content

Commit

Permalink
[OPENMP]Reduce number of captured global vars.
Browse files Browse the repository at this point in the history
Try to reduce the number of global vars captured in the OpenMP regions
by capturing them only the regions, which mark them as not-shared.
  • Loading branch information
alexey-bataev committed Mar 13, 2020
1 parent 6bbc173 commit 172f146
Show file tree
Hide file tree
Showing 8 changed files with 62 additions and 12 deletions.
7 changes: 7 additions & 0 deletions clang/include/clang/Sema/Sema.h
Expand Up @@ -9858,6 +9858,13 @@ class Sema final {
bool isOpenMPTargetCapturedDecl(const ValueDecl *D, unsigned Level,
unsigned CaptureLevel) const;

/// Check if the specified global variable must be captured by outer capture
/// regions.
/// \param Level Relative level of nested OpenMP construct for that
/// the check is performed.
bool isOpenMPGlobalCapturedDecl(ValueDecl *D, unsigned Level,
unsigned CaptureLevel) const;

ExprResult PerformOpenMPImplicitIntegerConversion(SourceLocation OpLoc,
Expr *Op);
/// Called on start of new data sharing attribute block.
Expand Down
9 changes: 7 additions & 2 deletions clang/lib/CodeGen/CGStmtOpenMP.cpp
Expand Up @@ -57,7 +57,8 @@ class OMPLexicalScope : public CodeGenFunction::LexicalScope {
static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
return CGF.LambdaCaptureFields.lookup(VD) ||
(CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
(CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl));
(CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
}

public:
Expand Down Expand Up @@ -5551,7 +5552,11 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
// Emit outlined function for task construct.
const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop);
Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
Address CapturedStruct = Address::invalid();
{
OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
CapturedStruct = GenerateCapturedStmtArgument(*CS);
}
QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
const Expr *IfCond = nullptr;
for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
Expand Down
8 changes: 7 additions & 1 deletion clang/lib/Sema/SemaExpr.cpp
Expand Up @@ -16523,13 +16523,19 @@ bool Sema::tryCaptureVariable(
!IsOpenMPPrivateDecl &&
isOpenMPTargetCapturedDecl(Var, RSI->OpenMPLevel,
RSI->OpenMPCaptureLevel);
// Do not capture global if it is not privatized in outer regions.
bool IsGlobalCap =
IsGlobal && isOpenMPGlobalCapturedDecl(Var, RSI->OpenMPLevel,
RSI->OpenMPCaptureLevel);

// When we detect target captures we are looking from inside the
// target region, therefore we need to propagate the capture from the
// enclosing region. Therefore, the capture is not initially nested.
if (IsTargetCap)
adjustOpenMPTargetScopeIndex(FunctionScopesIndex, RSI->OpenMPLevel);

if (IsTargetCap || IsOpenMPPrivateDecl) {
if (IsTargetCap || IsOpenMPPrivateDecl ||
(IsGlobal && !IsGlobalCap)) {
Nested = !IsTargetCap;
DeclRefType = DeclRefType.getUnqualifiedType();
CaptureType = Context.getLValueReferenceType(DeclRefType);
Expand Down
42 changes: 37 additions & 5 deletions clang/lib/Sema/SemaOpenMP.cpp
Expand Up @@ -498,6 +498,8 @@ class DSAStackTy {
const DSAVarData getTopDSA(ValueDecl *D, bool FromParent);
/// Returns data-sharing attributes for the specified declaration.
const DSAVarData getImplicitDSA(ValueDecl *D, bool FromParent) const;
/// Returns data-sharing attributes for the specified declaration.
const DSAVarData getImplicitDSA(ValueDecl *D, unsigned Level) const;
/// Checks if the specified variables has data-sharing attributes which
/// match specified \a CPred predicate in any directive which matches \a DPred
/// predicate.
Expand Down Expand Up @@ -1552,6 +1554,15 @@ const DSAStackTy::DSAVarData DSAStackTy::getImplicitDSA(ValueDecl *D,
return getDSA(StartI, D);
}

const DSAStackTy::DSAVarData DSAStackTy::getImplicitDSA(ValueDecl *D,
unsigned Level) const {
if (getStackSize() <= Level)
return DSAVarData();
D = getCanonicalDecl(D);
const_iterator StartI = std::next(begin(), getStackSize() - 1 - Level);
return getDSA(StartI, D);
}

const DSAStackTy::DSAVarData
DSAStackTy::hasDSA(ValueDecl *D,
const llvm::function_ref<bool(OpenMPClauseKind)> CPred,
Expand Down Expand Up @@ -2108,9 +2119,7 @@ VarDecl *Sema::isOpenMPCapturedDecl(ValueDecl *D, bool CheckScopeInfo,

void Sema::adjustOpenMPTargetScopeIndex(unsigned &FunctionScopesIndex,
unsigned Level) const {
SmallVector<OpenMPDirectiveKind, 4> Regions;
getOpenMPCaptureRegions(Regions, DSAStack->getDirective(Level));
FunctionScopesIndex -= Regions.size();
FunctionScopesIndex -= getOpenMPCaptureLevels(DSAStack->getDirective(Level));
}

void Sema::startOpenMPLoop() {
Expand Down Expand Up @@ -2213,6 +2222,29 @@ bool Sema::isOpenMPTargetCapturedDecl(const ValueDecl *D, unsigned Level,
Regions[CaptureLevel] != OMPD_task;
}

bool Sema::isOpenMPGlobalCapturedDecl(ValueDecl *D, unsigned Level,
unsigned CaptureLevel) const {
assert(LangOpts.OpenMP && "OpenMP is not allowed");
// Return true if the current level is no longer enclosed in a target region.

if (const auto *VD = dyn_cast<VarDecl>(D)) {
if (!VD->hasLocalStorage()) {
DSAStackTy::DSAVarData TopDVar =
DSAStack->getTopDSA(D, /*FromParent=*/false);
unsigned NumLevels =
getOpenMPCaptureLevels(DSAStack->getDirective(Level));
if (Level == 0)
return (NumLevels == CaptureLevel + 1) && TopDVar.CKind != OMPC_shared;
DSAStackTy::DSAVarData DVar = DSAStack->getImplicitDSA(D, Level - 1);
return DVar.CKind != OMPC_shared ||
isOpenMPGlobalCapturedDecl(
D, Level - 1,
getOpenMPCaptureLevels(DSAStack->getDirective(Level - 1)) - 1);
}
}
return true;
}

void Sema::DestroyDataSharingAttributesStack() { delete DSAStack; }

void Sema::finalizeOpenMPDelayedAnalysis() {
Expand Down Expand Up @@ -3575,7 +3607,7 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
};
// Start a captured region for 'parallel'.
ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
ParamsParallel, /*OpenMPCaptureLevel=*/1);
ParamsParallel, /*OpenMPCaptureLevel=*/0);
QualType Args[] = {VoidPtrTy};
FunctionProtoType::ExtProtoInfo EPI;
EPI.Variadic = true;
Expand All @@ -3596,7 +3628,7 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
std::make_pair(StringRef(), QualType()) // __context with shared vars
};
ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
Params, /*OpenMPCaptureLevel=*/2);
Params, /*OpenMPCaptureLevel=*/1);
// Mark this captured region as inlined, because we don't use outlined
// function directly.
getCurCapturedRegion()->TheCapturedDecl->addAttr(
Expand Down
Expand Up @@ -206,7 +206,7 @@ int main() {
// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 4
// CHECK: store [[S_DOUBLE_TY]]* %{{.+}}, [[S_DOUBLE_TY]]** [[VAR_REF]],
// CHECK: [[SIVAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 5
// CHECK: [[SIVAR_VAL:%.+]] = load i32, i32* %{{.+}},
// CHECK: [[SIVAR_VAL:%.+]] = load i32, i32* @{{.+}},
// CHECK: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_REF]],

// Allocate task.
Expand Down
Expand Up @@ -187,7 +187,7 @@ int main() {
// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3
// CHECK: store [[S_DOUBLE_TY]]* [[VAR_ADDR:%.+]], [[S_DOUBLE_TY]]** [[VAR_REF]],
// CHECK: [[SIVAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 4
// CHECK: store i{{[0-9]+}}* [[SIVAR:%.+]], i{{[0-9]+}}** [[SIVAR_REF]],
// CHECK: store i{{[0-9]+}}* [[SIVAR:@.+]], i{{[0-9]+}}** [[SIVAR_REF]],

// Allocate task.
// Returns struct kmp_task_t {
Expand Down
Expand Up @@ -206,7 +206,7 @@ int main() {
// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 4
// CHECK: store [[S_DOUBLE_TY]]* %{{.+}}, [[S_DOUBLE_TY]]** [[VAR_REF]],
// CHECK: [[SIVAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 5
// CHECK: [[SIVAR_VAL:%.+]] = load i32, i32* %{{.+}},
// CHECK: [[SIVAR_VAL:%.+]] = load i32, i32* @{{.+}},
// CHECK: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_REF]],

// Allocate task.
Expand Down
Expand Up @@ -187,7 +187,7 @@ int main() {
// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3
// CHECK: store [[S_DOUBLE_TY]]* [[VAR_ADDR:%.+]], [[S_DOUBLE_TY]]** [[VAR_REF]],
// CHECK: [[SIVAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 4
// CHECK: store i{{[0-9]+}}* [[SIVAR:%.+]], i{{[0-9]+}}** [[SIVAR_REF]],
// CHECK: store i{{[0-9]+}}* [[SIVAR:@.+]], i{{[0-9]+}}** [[SIVAR_REF]],

// Allocate task.
// Returns struct kmp_task_t {
Expand Down

0 comments on commit 172f146

Please sign in to comment.