From 172f1460ae05ab5c33c757142c8bdb10acfbdbe1 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 12 Mar 2020 12:52:02 -0400 Subject: [PATCH] [OPENMP]Reduce number of captured global vars. Try to reduce the number of global vars captured in the OpenMP regions by capturing them only the regions, which mark them as not-shared. --- clang/include/clang/Sema/Sema.h | 7 ++++ clang/lib/CodeGen/CGStmtOpenMP.cpp | 9 +++- clang/lib/Sema/SemaExpr.cpp | 8 +++- clang/lib/Sema/SemaOpenMP.cpp | 42 ++++++++++++++++--- ...l_master_taskloop_firstprivate_codegen.cpp | 2 +- ...el_master_taskloop_lastprivate_codegen.cpp | 2 +- ...ter_taskloop_simd_firstprivate_codegen.cpp | 2 +- ...ster_taskloop_simd_lastprivate_codegen.cpp | 2 +- 8 files changed, 62 insertions(+), 12 deletions(-) diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index ee02d3189816d..ead15bf7da2df 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -9858,6 +9858,13 @@ class Sema final { bool isOpenMPTargetCapturedDecl(const ValueDecl *D, unsigned Level, unsigned CaptureLevel) const; + /// Check if the specified global variable must be captured by outer capture + /// regions. + /// \param Level Relative level of nested OpenMP construct for that + /// the check is performed. + bool isOpenMPGlobalCapturedDecl(ValueDecl *D, unsigned Level, + unsigned CaptureLevel) const; + ExprResult PerformOpenMPImplicitIntegerConversion(SourceLocation OpLoc, Expr *Op); /// Called on start of new data sharing attribute block. diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 55057c19df4fd..83807f609e90e 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -57,7 +57,8 @@ class OMPLexicalScope : public CodeGenFunction::LexicalScope { static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { return CGF.LambdaCaptureFields.lookup(VD) || (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || - (CGF.CurCodeDecl && isa(CGF.CurCodeDecl)); + (CGF.CurCodeDecl && isa(CGF.CurCodeDecl) && + cast(CGF.CurCodeDecl)->capturesVariable(VD)); } public: @@ -5551,7 +5552,11 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); // Emit outlined function for task construct. const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop); - Address CapturedStruct = GenerateCapturedStmtArgument(*CS); + Address CapturedStruct = Address::invalid(); + { + OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false); + CapturedStruct = GenerateCapturedStmtArgument(*CS); + } QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); const Expr *IfCond = nullptr; for (const auto *C : S.getClausesOfKind()) { diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index b2fe4fe2c5885..8cc5ac2677742 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -16523,13 +16523,19 @@ bool Sema::tryCaptureVariable( !IsOpenMPPrivateDecl && isOpenMPTargetCapturedDecl(Var, RSI->OpenMPLevel, RSI->OpenMPCaptureLevel); + // Do not capture global if it is not privatized in outer regions. + bool IsGlobalCap = + IsGlobal && isOpenMPGlobalCapturedDecl(Var, RSI->OpenMPLevel, + RSI->OpenMPCaptureLevel); + // When we detect target captures we are looking from inside the // target region, therefore we need to propagate the capture from the // enclosing region. Therefore, the capture is not initially nested. if (IsTargetCap) adjustOpenMPTargetScopeIndex(FunctionScopesIndex, RSI->OpenMPLevel); - if (IsTargetCap || IsOpenMPPrivateDecl) { + if (IsTargetCap || IsOpenMPPrivateDecl || + (IsGlobal && !IsGlobalCap)) { Nested = !IsTargetCap; DeclRefType = DeclRefType.getUnqualifiedType(); CaptureType = Context.getLValueReferenceType(DeclRefType); diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 89b7c0b1cd0d9..a3506bf046f72 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -498,6 +498,8 @@ class DSAStackTy { const DSAVarData getTopDSA(ValueDecl *D, bool FromParent); /// Returns data-sharing attributes for the specified declaration. const DSAVarData getImplicitDSA(ValueDecl *D, bool FromParent) const; + /// Returns data-sharing attributes for the specified declaration. + const DSAVarData getImplicitDSA(ValueDecl *D, unsigned Level) const; /// Checks if the specified variables has data-sharing attributes which /// match specified \a CPred predicate in any directive which matches \a DPred /// predicate. @@ -1552,6 +1554,15 @@ const DSAStackTy::DSAVarData DSAStackTy::getImplicitDSA(ValueDecl *D, return getDSA(StartI, D); } +const DSAStackTy::DSAVarData DSAStackTy::getImplicitDSA(ValueDecl *D, + unsigned Level) const { + if (getStackSize() <= Level) + return DSAVarData(); + D = getCanonicalDecl(D); + const_iterator StartI = std::next(begin(), getStackSize() - 1 - Level); + return getDSA(StartI, D); +} + const DSAStackTy::DSAVarData DSAStackTy::hasDSA(ValueDecl *D, const llvm::function_ref CPred, @@ -2108,9 +2119,7 @@ VarDecl *Sema::isOpenMPCapturedDecl(ValueDecl *D, bool CheckScopeInfo, void Sema::adjustOpenMPTargetScopeIndex(unsigned &FunctionScopesIndex, unsigned Level) const { - SmallVector Regions; - getOpenMPCaptureRegions(Regions, DSAStack->getDirective(Level)); - FunctionScopesIndex -= Regions.size(); + FunctionScopesIndex -= getOpenMPCaptureLevels(DSAStack->getDirective(Level)); } void Sema::startOpenMPLoop() { @@ -2213,6 +2222,29 @@ bool Sema::isOpenMPTargetCapturedDecl(const ValueDecl *D, unsigned Level, Regions[CaptureLevel] != OMPD_task; } +bool Sema::isOpenMPGlobalCapturedDecl(ValueDecl *D, unsigned Level, + unsigned CaptureLevel) const { + assert(LangOpts.OpenMP && "OpenMP is not allowed"); + // Return true if the current level is no longer enclosed in a target region. + + if (const auto *VD = dyn_cast(D)) { + if (!VD->hasLocalStorage()) { + DSAStackTy::DSAVarData TopDVar = + DSAStack->getTopDSA(D, /*FromParent=*/false); + unsigned NumLevels = + getOpenMPCaptureLevels(DSAStack->getDirective(Level)); + if (Level == 0) + return (NumLevels == CaptureLevel + 1) && TopDVar.CKind != OMPC_shared; + DSAStackTy::DSAVarData DVar = DSAStack->getImplicitDSA(D, Level - 1); + return DVar.CKind != OMPC_shared || + isOpenMPGlobalCapturedDecl( + D, Level - 1, + getOpenMPCaptureLevels(DSAStack->getDirective(Level - 1)) - 1); + } + } + return true; +} + void Sema::DestroyDataSharingAttributesStack() { delete DSAStack; } void Sema::finalizeOpenMPDelayedAnalysis() { @@ -3575,7 +3607,7 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { }; // Start a captured region for 'parallel'. ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, - ParamsParallel, /*OpenMPCaptureLevel=*/1); + ParamsParallel, /*OpenMPCaptureLevel=*/0); QualType Args[] = {VoidPtrTy}; FunctionProtoType::ExtProtoInfo EPI; EPI.Variadic = true; @@ -3596,7 +3628,7 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { std::make_pair(StringRef(), QualType()) // __context with shared vars }; ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, - Params, /*OpenMPCaptureLevel=*/2); + Params, /*OpenMPCaptureLevel=*/1); // Mark this captured region as inlined, because we don't use outlined // function directly. getCurCapturedRegion()->TheCapturedDecl->addAttr( diff --git a/clang/test/OpenMP/parallel_master_taskloop_firstprivate_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_firstprivate_codegen.cpp index ec40af192b27b..39c3184f2f894 100644 --- a/clang/test/OpenMP/parallel_master_taskloop_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_firstprivate_codegen.cpp @@ -206,7 +206,7 @@ int main() { // CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 4 // CHECK: store [[S_DOUBLE_TY]]* %{{.+}}, [[S_DOUBLE_TY]]** [[VAR_REF]], // CHECK: [[SIVAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 5 -// CHECK: [[SIVAR_VAL:%.+]] = load i32, i32* %{{.+}}, +// CHECK: [[SIVAR_VAL:%.+]] = load i32, i32* @{{.+}}, // CHECK: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_REF]], // Allocate task. diff --git a/clang/test/OpenMP/parallel_master_taskloop_lastprivate_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_lastprivate_codegen.cpp index 52cd2662e1421..e74030da788f7 100644 --- a/clang/test/OpenMP/parallel_master_taskloop_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_lastprivate_codegen.cpp @@ -187,7 +187,7 @@ int main() { // CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3 // CHECK: store [[S_DOUBLE_TY]]* [[VAR_ADDR:%.+]], [[S_DOUBLE_TY]]** [[VAR_REF]], // CHECK: [[SIVAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 4 -// CHECK: store i{{[0-9]+}}* [[SIVAR:%.+]], i{{[0-9]+}}** [[SIVAR_REF]], +// CHECK: store i{{[0-9]+}}* [[SIVAR:@.+]], i{{[0-9]+}}** [[SIVAR_REF]], // Allocate task. // Returns struct kmp_task_t { diff --git a/clang/test/OpenMP/parallel_master_taskloop_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_firstprivate_codegen.cpp index 80897ff1fcfba..689d3bb966ccf 100644 --- a/clang/test/OpenMP/parallel_master_taskloop_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_simd_firstprivate_codegen.cpp @@ -206,7 +206,7 @@ int main() { // CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 4 // CHECK: store [[S_DOUBLE_TY]]* %{{.+}}, [[S_DOUBLE_TY]]** [[VAR_REF]], // CHECK: [[SIVAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 5 -// CHECK: [[SIVAR_VAL:%.+]] = load i32, i32* %{{.+}}, +// CHECK: [[SIVAR_VAL:%.+]] = load i32, i32* @{{.+}}, // CHECK: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_REF]], // Allocate task. diff --git a/clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp index 9014ce378281e..efb32f025215f 100644 --- a/clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp @@ -187,7 +187,7 @@ int main() { // CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3 // CHECK: store [[S_DOUBLE_TY]]* [[VAR_ADDR:%.+]], [[S_DOUBLE_TY]]** [[VAR_REF]], // CHECK: [[SIVAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 4 -// CHECK: store i{{[0-9]+}}* [[SIVAR:%.+]], i{{[0-9]+}}** [[SIVAR_REF]], +// CHECK: store i{{[0-9]+}}* [[SIVAR:@.+]], i{{[0-9]+}}** [[SIVAR_REF]], // Allocate task. // Returns struct kmp_task_t {