diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index 20a8ada60e0fe..ae99357eeea7f 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -597,6 +597,9 @@ class LangOptions : public LangOptionsBase { return !requiresStrictPrototypes() && !OpenCL; } + /// Returns true if the language supports calling the 'atexit' function. + bool hasAtExit() const { return !(OpenMP && OpenMPIsTargetDevice); } + /// Returns true if implicit int is part of the language requirements. bool isImplicitIntRequired() const { return !CPlusPlus && !C99; } diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp index 3fa28b343663f..e08a1e5f42df2 100644 --- a/clang/lib/CodeGen/CGDeclCXX.cpp +++ b/clang/lib/CodeGen/CGDeclCXX.cpp @@ -327,6 +327,15 @@ void CodeGenFunction::registerGlobalDtorWithAtExit(const VarDecl &VD, registerGlobalDtorWithAtExit(dtorStub); } +/// Register a global destructor using the LLVM 'llvm.global_dtors' global. +void CodeGenFunction::registerGlobalDtorWithLLVM(const VarDecl &VD, + llvm::FunctionCallee Dtor, + llvm::Constant *Addr) { + // Create a function which calls the destructor. + llvm::Function *dtorStub = createAtExitStub(VD, Dtor, Addr); + CGM.AddGlobalDtor(dtorStub); +} + void CodeGenFunction::registerGlobalDtorWithAtExit(llvm::Constant *dtorStub) { // extern "C" int atexit(void (*f)(void)); assert(dtorStub->getType() == @@ -519,10 +528,6 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D, D->hasAttr())) return; - if (getLangOpts().OpenMP && - getOpenMPRuntime().emitDeclareTargetVarDefinition(D, Addr, PerformInit)) - return; - // Check if we've already initialized this decl. auto I = DelayedCXXInitPosition.find(D); if (I != DelayedCXXInitPosition.end() && I->second == ~0U) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index a8e1150e44566..d2be8141a3a4b 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1747,136 +1747,6 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( return nullptr; } -bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, - llvm::GlobalVariable *Addr, - bool PerformInit) { - if (CGM.getLangOpts().OMPTargetTriples.empty() && - !CGM.getLangOpts().OpenMPIsTargetDevice) - return false; - std::optional Res = - OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); - if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || - ((*Res == OMPDeclareTargetDeclAttr::MT_To || - *Res == OMPDeclareTargetDeclAttr::MT_Enter) && - HasRequiresUnifiedSharedMemory)) - return CGM.getLangOpts().OpenMPIsTargetDevice; - VD = VD->getDefinition(CGM.getContext()); - assert(VD && "Unknown VarDecl"); - - if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) - return CGM.getLangOpts().OpenMPIsTargetDevice; - - QualType ASTTy = VD->getType(); - SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); - - // Produce the unique prefix to identify the new target regions. We use - // the source location of the variable declaration which we know to not - // conflict with any target region. - llvm::TargetRegionEntryInfo EntryInfo = - getEntryInfoFromPresumedLoc(CGM, OMPBuilder, Loc, VD->getName()); - SmallString<128> Buffer, Out; - OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Buffer, EntryInfo); - - const Expr *Init = VD->getAnyInitializer(); - if (CGM.getLangOpts().CPlusPlus && PerformInit) { - llvm::Constant *Ctor; - llvm::Constant *ID; - if (CGM.getLangOpts().OpenMPIsTargetDevice) { - // Generate function that re-emits the declaration's initializer into - // the threadprivate copy of the variable VD - CodeGenFunction CtorCGF(CGM); - - const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); - llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); - llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( - FTy, Twine(Buffer, "_ctor"), FI, Loc, false, - llvm::GlobalValue::WeakODRLinkage); - Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility); - if (CGM.getTriple().isAMDGCN()) - Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); - auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); - CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, - FunctionArgList(), Loc, Loc); - auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); - llvm::Constant *AddrInAS0 = Addr; - if (Addr->getAddressSpace() != 0) - AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast( - Addr, llvm::PointerType::get(CGM.getLLVMContext(), 0)); - CtorCGF.EmitAnyExprToMem(Init, - Address(AddrInAS0, Addr->getValueType(), - CGM.getContext().getDeclAlign(VD)), - Init->getType().getQualifiers(), - /*IsInitializer=*/true); - CtorCGF.FinishFunction(); - Ctor = Fn; - ID = Fn; - } else { - Ctor = new llvm::GlobalVariable( - CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, - llvm::GlobalValue::PrivateLinkage, - llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); - ID = Ctor; - } - - // Register the information for the entry associated with the constructor. - Out.clear(); - auto CtorEntryInfo = EntryInfo; - CtorEntryInfo.ParentName = Twine(Buffer, "_ctor").toStringRef(Out); - OMPBuilder.OffloadInfoManager.registerTargetRegionEntryInfo( - CtorEntryInfo, Ctor, ID, - llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryCtor); - } - if (VD->getType().isDestructedType() != QualType::DK_none) { - llvm::Constant *Dtor; - llvm::Constant *ID; - if (CGM.getLangOpts().OpenMPIsTargetDevice) { - // Generate function that emits destructor call for the threadprivate - // copy of the variable VD - CodeGenFunction DtorCGF(CGM); - - const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); - llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); - llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( - FTy, Twine(Buffer, "_dtor"), FI, Loc, false, - llvm::GlobalValue::WeakODRLinkage); - Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility); - if (CGM.getTriple().isAMDGCN()) - Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); - auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); - DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, - FunctionArgList(), Loc, Loc); - // Create a scope with an artificial location for the body of this - // function. - auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); - llvm::Constant *AddrInAS0 = Addr; - if (Addr->getAddressSpace() != 0) - AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast( - Addr, llvm::PointerType::get(CGM.getLLVMContext(), 0)); - DtorCGF.emitDestroy(Address(AddrInAS0, Addr->getValueType(), - CGM.getContext().getDeclAlign(VD)), - ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), - DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); - DtorCGF.FinishFunction(); - Dtor = Fn; - ID = Fn; - } else { - Dtor = new llvm::GlobalVariable( - CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, - llvm::GlobalValue::PrivateLinkage, - llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); - ID = Dtor; - } - // Register the information for the entry associated with the destructor. - Out.clear(); - auto DtorEntryInfo = EntryInfo; - DtorEntryInfo.ParentName = Twine(Buffer, "_dtor").toStringRef(Out); - OMPBuilder.OffloadInfoManager.registerTargetRegionEntryInfo( - DtorEntryInfo, Dtor, ID, - llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryDtor); - } - return CGM.getLangOpts().OpenMPIsTargetDevice; -} - void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD, llvm::GlobalValue *GV) { std::optional ActiveAttr = diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index 0c4ad46e881b9..b01b39abd1606 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -1089,14 +1089,6 @@ class CGOpenMPRuntime { SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF = nullptr); - /// Emit a code for initialization of declare target variable. - /// \param VD Declare target variable. - /// \param Addr Address of the global variable \a VD. - /// \param PerformInit true if initialization expression is not constant. - virtual bool emitDeclareTargetVarDefinition(const VarDecl *VD, - llvm::GlobalVariable *Addr, - bool PerformInit); - /// Emit code for handling declare target functions in the runtime. /// \param FD Declare target function. /// \param Addr Address of the global \a FD. diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index bb8c14401032b..b4c634dcf5537 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4536,6 +4536,11 @@ class CodeGenFunction : public CodeGenTypeCache { void registerGlobalDtorWithAtExit(const VarDecl &D, llvm::FunctionCallee fn, llvm::Constant *addr); + /// Registers the dtor using 'llvm.global_dtors' for platforms that do not + /// support an 'atexit()' function. + void registerGlobalDtorWithLLVM(const VarDecl &D, llvm::FunctionCallee fn, + llvm::Constant *addr); + /// Call atexit() with function dtorStub. void registerGlobalDtorWithAtExit(llvm::Constant *dtorStub); diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 793861f23b15f..e81edc979c208 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -1570,6 +1570,13 @@ class CodeGenModule : public CodeGenTypeCache { const VarDecl *D, ForDefinition_t IsForDefinition = NotForDefinition); + // FIXME: Hardcoding priority here is gross. + void AddGlobalCtor(llvm::Function *Ctor, int Priority = 65535, + unsigned LexOrder = ~0U, + llvm::Constant *AssociatedData = nullptr); + void AddGlobalDtor(llvm::Function *Dtor, int Priority = 65535, + bool IsDtorAttrFunc = false); + private: llvm::Constant *GetOrCreateLLVMFunction( StringRef MangledName, llvm::Type *Ty, GlobalDecl D, bool ForVTable, @@ -1641,13 +1648,6 @@ class CodeGenModule : public CodeGenTypeCache { void EmitPointerToInitFunc(const VarDecl *VD, llvm::GlobalVariable *Addr, llvm::Function *InitFunc, InitSegAttr *ISA); - // FIXME: Hardcoding priority here is gross. - void AddGlobalCtor(llvm::Function *Ctor, int Priority = 65535, - unsigned LexOrder = ~0U, - llvm::Constant *AssociatedData = nullptr); - void AddGlobalDtor(llvm::Function *Dtor, int Priority = 65535, - bool IsDtorAttrFunc = false); - /// EmitCtorList - Generates a global array of functions and priorities using /// the given list and name. This array will have appending linkage and is /// suitable for use as a LLVM constructor or destructor array. Clears Fns. diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index 89a2127f3761a..7398f494c0206 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -2794,6 +2794,14 @@ void ItaniumCXXABI::registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D, if (D.isNoDestroy(CGM.getContext())) return; + // OpenMP offloading supports C++ constructors and destructors but we do not + // always have 'atexit' available. Instead lower these to use the LLVM global + // destructors which we can handle directly in the runtime. Note that this is + // not strictly 1-to-1 with using `atexit` because we no longer tear down + // globals in reverse order of when they were constructed. + if (!CGM.getLangOpts().hasAtExit() && !D.isStaticLocal()) + return CGF.registerGlobalDtorWithLLVM(D, dtor, addr); + // emitGlobalDtorWithCXAAtExit will emit a call to either __cxa_thread_atexit // or __cxa_atexit depending on whether this VarDecl is a thread-local storage // or not. CXAAtExit controls only __cxa_atexit, so use it if it is enabled. diff --git a/clang/test/Headers/amdgcn_openmp_device_math_constexpr.cpp b/clang/test/Headers/amdgcn_openmp_device_math_constexpr.cpp index a5bb949ccaad3..0fdc02edc1508 100644 --- a/clang/test/Headers/amdgcn_openmp_device_math_constexpr.cpp +++ b/clang/test/Headers/amdgcn_openmp_device_math_constexpr.cpp @@ -35,7 +35,7 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f); #pragma omp end declare target -// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_constexpr_fabsf_f32_l14_ctor +// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init // CHECK-SAME: () #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5) @@ -49,7 +49,7 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f); // CHECK-NEXT: ret void // // -// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_constexpr_fabs_f32_l15_ctor +// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.1 // CHECK-SAME: () #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL_I_I:%.*]] = alloca float, align 4, addrspace(5) @@ -69,7 +69,7 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f); // CHECK-NEXT: ret void // // -// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_constexpr_sinf_f32_l17_ctor +// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.2 // CHECK-SAME: () #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5) @@ -83,7 +83,7 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f); // CHECK-NEXT: ret void // // -// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_constexpr_sin_f32_l18_ctor +// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.3 // CHECK-SAME: () #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL_I_I:%.*]] = alloca float, align 4, addrspace(5) @@ -103,7 +103,7 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f); // CHECK-NEXT: ret void // // -// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_constexpr_cosf_f32_l20_ctor +// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.4 // CHECK-SAME: () #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5) @@ -117,7 +117,7 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f); // CHECK-NEXT: ret void // // -// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_constexpr_cos_f32_l21_ctor +// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.5 // CHECK-SAME: () #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL_I_I:%.*]] = alloca float, align 4, addrspace(5) @@ -137,7 +137,7 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f); // CHECK-NEXT: ret void // // -// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_constexpr_fmaf_f32_l23_ctor +// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.6 // CHECK-SAME: () #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5) @@ -159,7 +159,7 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f); // CHECK-NEXT: ret void // // -// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_constexpr_fma_f32_l24_ctor +// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.7 // CHECK-SAME: () #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL_I_I:%.*]] = alloca float, align 4, addrspace(5) @@ -195,7 +195,7 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f); // CHECK-NEXT: ret void // // -// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_constexpr_min_f32_l27_ctor +// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.8 // CHECK-SAME: () #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5) @@ -213,7 +213,7 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f); // CHECK-NEXT: ret void // // -// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_constexpr_max_f32_l28_ctor +// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.9 // CHECK-SAME: () #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5) @@ -231,7 +231,7 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f); // CHECK-NEXT: ret void // // -// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_constexpr_fmin_f32_l30_ctor +// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.10 // CHECK-SAME: () #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[CALL:%.*]] = call noundef float @_Z4fminff(float noundef 2.000000e+00, float noundef -4.000000e+00) #[[ATTR4:[0-9]+]] @@ -239,7 +239,7 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f); // CHECK-NEXT: ret void // // -// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_constexpr_fmax_f32_l31_ctor +// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.11 // CHECK-SAME: () #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[CALL:%.*]] = call noundef float @_Z4fmaxff(float noundef 2.000000e+00, float noundef -4.000000e+00) #[[ATTR4]] @@ -247,7 +247,7 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f); // CHECK-NEXT: ret void // // -// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_constexpr_fminf_f32_l33_ctor +// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.12 // CHECK-SAME: () #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5) @@ -265,7 +265,7 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f); // CHECK-NEXT: ret void // // -// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_constexpr_fmaxf_f32_l34_ctor +// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.13 // CHECK-SAME: () #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5) @@ -282,3 +282,23 @@ const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f); // CHECK-NEXT: store float [[TMP2]], ptr addrspacecast (ptr addrspace(1) @_ZL19constexpr_fmaxf_f32 to ptr), align 4 // CHECK-NEXT: ret void // +// +// CHECK-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_amdgcn_openmp_device_math_constexpr.cpp +// CHECK-SAME: () #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @__cxx_global_var_init() +// CHECK-NEXT: call void @__cxx_global_var_init.1() +// CHECK-NEXT: call void @__cxx_global_var_init.2() +// CHECK-NEXT: call void @__cxx_global_var_init.3() +// CHECK-NEXT: call void @__cxx_global_var_init.4() +// CHECK-NEXT: call void @__cxx_global_var_init.5() +// CHECK-NEXT: call void @__cxx_global_var_init.6() +// CHECK-NEXT: call void @__cxx_global_var_init.7() +// CHECK-NEXT: call void @__cxx_global_var_init.8() +// CHECK-NEXT: call void @__cxx_global_var_init.9() +// CHECK-NEXT: call void @__cxx_global_var_init.10() +// CHECK-NEXT: call void @__cxx_global_var_init.11() +// CHECK-NEXT: call void @__cxx_global_var_init.12() +// CHECK-NEXT: call void @__cxx_global_var_init.13() +// CHECK-NEXT: ret void +// diff --git a/clang/test/OpenMP/amdgcn_target_global_constructor.cpp b/clang/test/OpenMP/amdgcn_target_global_constructor.cpp index ff5a3ba2b95d2..80a5067b357a2 100644 --- a/clang/test/OpenMP/amdgcn_target_global_constructor.cpp +++ b/clang/test/OpenMP/amdgcn_target_global_constructor.cpp @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-globals --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc @@ -20,7 +20,11 @@ S A; #pragma omp end declare target #endif -// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_A_l19_ctor +//. +// CHECK: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @_GLOBAL__sub_I_amdgcn_target_global_constructor.cpp, ptr null }] +// CHECK: @llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @__dtor_A, ptr null }] +//. +// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init // CHECK-SAME: () #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: call void @_ZN1SC1Ev(ptr noundef nonnull align 4 dereferenceable(4) addrspacecast (ptr addrspace(1) @A to ptr)) #[[ATTR3:[0-9]+]] @@ -38,13 +42,6 @@ S A; // CHECK-NEXT: ret void // // -// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_A_l19_dtor -// CHECK-SAME: () #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: call void @_ZN1SD1Ev(ptr noundef nonnull align 4 dereferenceable(4) addrspacecast (ptr addrspace(1) @A to ptr)) #[[ATTR4:[0-9]+]] -// CHECK-NEXT: ret void -// -// // CHECK-LABEL: define {{[^@]+}}@_ZN1SD1Ev // CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK-NEXT: entry: @@ -52,7 +49,14 @@ S A; // CHECK-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr // CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8 -// CHECK-NEXT: call void @_ZN1SD2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK-NEXT: call void @_ZN1SD2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4:[0-9]+]] +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__dtor_A +// CHECK-SAME: () #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @_ZN1SD1Ev(ptr addrspacecast (ptr addrspace(1) @A to ptr)) // CHECK-NEXT: ret void // // @@ -78,3 +82,9 @@ S A; // CHECK-NEXT: call void @_Z3foov() #[[ATTR3]] // CHECK-NEXT: ret void // +// +// CHECK-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_amdgcn_target_global_constructor.cpp +// CHECK-SAME: () #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @__cxx_global_var_init() +// CHECK-NEXT: ret void diff --git a/clang/test/OpenMP/declare_target_codegen.cpp b/clang/test/OpenMP/declare_target_codegen.cpp index b4f6e43374541..a5a9b790b4689 100644 --- a/clang/test/OpenMP/declare_target_codegen.cpp +++ b/clang/test/OpenMP/declare_target_codegen.cpp @@ -52,7 +52,6 @@ // CHECK-DAG: define {{.*}}i32 @{{.*}}{{foo|bar|baz2|baz3|FA|f_method}}{{.*}}() // CHECK-DAG: define {{.*}}void @{{.*}}TemplateClass{{.*}}(ptr {{[^,]*}} %{{.*}}) // CHECK-DAG: define {{.*}}i32 @{{.*}}TemplateClass{{.*}}f_method{{.*}}(ptr {{[^,]*}} %{{.*}}) -// CHECK-DAG: define {{.*}}void @__omp_offloading_{{.*}}_globals_l[[@LINE+89]]_ctor() #ifndef HEADER #define HEADER diff --git a/clang/test/OpenMP/nvptx_declare_target_var_ctor_dtor_codegen.cpp b/clang/test/OpenMP/nvptx_declare_target_var_ctor_dtor_codegen.cpp index a612ec10f34c4..1d9ef0c398166 100644 --- a/clang/test/OpenMP/nvptx_declare_target_var_ctor_dtor_codegen.cpp +++ b/clang/test/OpenMP/nvptx_declare_target_var_ctor_dtor_codegen.cpp @@ -20,6 +20,9 @@ // HOST-DAG: @[[C_ADDR:.+]] = internal global i32 0, // HOST-DAG: @[[CD_ADDR:.+]] ={{( protected | dso_local)?}} global %struct.S zeroinitializer, +// DEVICE-DAG: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @[[CTOR:.+]], ptr null }] +// DEVICE-DAG: @llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @[[DTOR:.+]], ptr null }] + #pragma omp declare target int foo() { return 0; } #pragma omp end declare target @@ -43,12 +46,6 @@ int caz() { return 0; } static int c = foo() + bar() + baz(); #pragma omp declare target (c) -// HOST-DAG: @[[C_CTOR:__omp_offloading_.+_c_l44_ctor]] = private constant i8 0 -// DEVICE-DAG: define weak_odr protected void [[C_CTOR:@__omp_offloading_.+_c_l44_ctor]]() -// DEVICE-DAG: call noundef i32 [[FOO]]() -// DEVICE-DAG: call noundef i32 [[BAR]]() -// DEVICE-DAG: call noundef i32 [[BAZ]]() -// DEVICE-DAG: ret void struct S { int a; @@ -60,26 +57,7 @@ struct S { #pragma omp declare target S cd = doo() + car() + caz() + baz(); #pragma omp end declare target -// HOST-DAG: @[[CD_CTOR:__omp_offloading_.+_cd_l61_ctor]] = private constant i8 0 -// DEVICE-DAG: define weak_odr protected void [[CD_CTOR:@__omp_offloading_.+_cd_l61_ctor]]() -// DEVICE-DAG: call noundef i32 [[DOO]]() -// DEVICE-DAG: call noundef i32 [[CAR]]() -// DEVICE-DAG: call noundef i32 [[CAZ]]() -// DEVICE-DAG: ret void - -// HOST-DAG: @[[CD_DTOR:__omp_offloading_.+_cd_l61_dtor]] = private constant i8 0 -// DEVICE-DAG: define weak_odr protected void [[CD_DTOR:@__omp_offloading_.+_cd_l61_dtor]]() -// DEVICE-DAG: call void -// DEVICE-DAG: ret void - -// HOST-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [{{[0-9]+}} x i8] c"[[CD_ADDR]]\00" -// HOST-DAG: @.omp_offloading.entry.[[CD_ADDR]] = weak{{.*}} constant %struct.__tgt_offload_entry { ptr @[[CD_ADDR]], ptr @.omp_offloading.entry_name{{.*}}, i64 4, i32 0, i32 0 }, section "omp_offloading_entries", align 1 -// HOST-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [{{[0-9]+}} x i8] c"[[C_CTOR]]\00" -// HOST-DAG: @.omp_offloading.entry.[[C_CTOR]] = weak{{.*}} constant %struct.__tgt_offload_entry { ptr @[[C_CTOR]], ptr @.omp_offloading.entry_name{{.*}}, i64 0, i32 2, i32 0 }, section "omp_offloading_entries", align 1 -// HOST-DAG: @.omp_offloading.entry_name{{.*}}= internal unnamed_addr constant [{{[0-9]+}} x i8] c"[[CD_CTOR]]\00" -// HOST-DAG: @.omp_offloading.entry.[[CD_CTOR]] = weak{{.*}} constant %struct.__tgt_offload_entry { ptr @[[CD_CTOR]], ptr @.omp_offloading.entry_name{{.*}}, i64 0, i32 2, i32 0 }, section "omp_offloading_entries", align 1 -// HOST-DAG: @.omp_offloading.entry_name{{.*}}= internal unnamed_addr constant [{{[0-9]+}} x i8] c"[[CD_DTOR]]\00" -// HOST-DAG: @.omp_offloading.entry.[[CD_DTOR]] = weak{{.*}} constant %struct.__tgt_offload_entry { ptr @[[CD_DTOR]], ptr @.omp_offloading.entry_name{{.*}}, i64 0, i32 4, i32 0 }, section "omp_offloading_entries", align 1 + int maini1() { int a; #pragma omp target map(tofrom : a) @@ -100,10 +78,5 @@ int maini1() { // HOST-DAG: !{i32 1, !"[[CD_ADDR]]", i32 0, i32 {{[0-9]+}}} // HOST-DAG: !{i32 1, !"[[C_ADDR]]", i32 0, i32 {{[0-9]+}}} -// DEVICE: !nvvm.annotations -// DEVICE-DAG: !{ptr [[C_CTOR]], !"kernel", i32 1} -// DEVICE-DAG: !{ptr [[CD_CTOR]], !"kernel", i32 1} -// DEVICE-DAG: !{ptr [[CD_DTOR]], !"kernel", i32 1} - #endif // HEADER diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 7fdd7bf868b8b..334eaf01a59c9 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -265,10 +265,6 @@ class OffloadEntriesInfoManager { enum OMPTargetRegionEntryKind : uint32_t { /// Mark the entry as target region. OMPTargetRegionEntryTargetRegion = 0x0, - /// Mark the entry as a global constructor. - OMPTargetRegionEntryCtor = 0x02, - /// Mark the entry as a global destructor. - OMPTargetRegionEntryDtor = 0x04, }; /// Target region entries info. diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 1ee0b09f98712..5ced4129ee802 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -789,14 +789,17 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) { if (M.alias_size() && (STI.getPTXVersion() < 63 || STI.getSmVersion() < 30)) report_fatal_error(".alias requires PTX version >= 6.3 and sm_30"); + // OpenMP supports NVPTX global constructors and destructors. + bool IsOpenMP = M.getModuleFlag("openmp") != nullptr; + if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors")) && - !LowerCtorDtor) { + !LowerCtorDtor && !IsOpenMP) { report_fatal_error( "Module has a nontrivial global ctor, which NVPTX does not support."); return true; // error } if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors")) && - !LowerCtorDtor) { + !LowerCtorDtor && !IsOpenMP) { report_fatal_error( "Module has a nontrivial global dtor, which NVPTX does not support."); return true; // error diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp index 399a71390a65a..a529c379844e9 100644 --- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp @@ -1914,6 +1914,16 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { return Plugin::success(); } + virtual Error callGlobalConstructors(GenericPluginTy &Plugin, + DeviceImageTy &Image) override { + return callGlobalCtorDtorCommon(Plugin, Image, "amdgcn.device.init"); + } + + virtual Error callGlobalDestructors(GenericPluginTy &Plugin, + DeviceImageTy &Image) override { + return callGlobalCtorDtorCommon(Plugin, Image, "amdgcn.device.fini"); + } + const uint64_t getStreamBusyWaitMicroseconds() const { return OMPX_StreamBusyWait; } @@ -2627,6 +2637,38 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { using AMDGPUEventRef = AMDGPUResourceRef; using AMDGPUEventManagerTy = GenericDeviceResourceManagerTy; + /// Common method to invoke a single threaded constructor or destructor + /// kernel by name. + Error callGlobalCtorDtorCommon(GenericPluginTy &Plugin, DeviceImageTy &Image, + const char *Name) { + // Perform a quick check for the named kernel in the image. The kernel + // should be created by the 'amdgpu-lower-ctor-dtor' pass. + GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler(); + GlobalTy Global(Name, sizeof(void *)); + if (auto Err = Handler.getGlobalMetadataFromImage(*this, Image, Global)) { + consumeError(std::move(Err)); + return Plugin::success(); + } + + // Allocate and construct the AMDGPU kernel. + AMDGPUKernelTy AMDGPUKernel(Name); + if (auto Err = AMDGPUKernel.init(*this, Image)) + return std::move(Err); + + AsyncInfoWrapperTy AsyncInfoWrapper(*this, nullptr); + + KernelArgsTy KernelArgs = {}; + if (auto Err = AMDGPUKernel.launchImpl(*this, /*NumThread=*/1u, + /*NumBlocks=*/1ul, KernelArgs, + /*Args=*/nullptr, AsyncInfoWrapper)) + return std::move(Err); + + Error Err = Plugin::success(); + AsyncInfoWrapper.finalize(Err); + + return std::move(Err); + } + /// Envar for controlling the number of HSA queues per device. High number of /// queues may degrade performance. UInt32Envar OMPX_NumQueues; diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/GlobalHandler.h b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/GlobalHandler.h index b51ed3c09759c..c92e0d549c1bd 100644 --- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/GlobalHandler.h +++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/GlobalHandler.h @@ -91,11 +91,6 @@ class GenericGlobalHandlerTy { /// Map to store the ELF object files that have been loaded. llvm::DenseMap ELFObjectFiles; - /// Get the cached ELF64LEObjectFile previosuly created for a specific - /// device image or create it if did not exist. - const ELF64LEObjectFile * - getOrCreateELFObjectFile(const GenericDeviceTy &Device, DeviceImageTy &Image); - /// Extract the global's information from the ELF image, section, and symbol. virtual Error getGlobalMetadataFromELF(const DeviceImageTy &Image, const ELF64LE::Sym &Symbol, @@ -119,6 +114,11 @@ class GenericGlobalHandlerTy { public: virtual ~GenericGlobalHandlerTy() {} + /// Get the cached ELF64LEObjectFile previosuly created for a specific + /// device image or create it if did not exist. + const ELF64LEObjectFile * + getOrCreateELFObjectFile(const GenericDeviceTy &Device, DeviceImageTy &Image); + /// Get the address and size of a global in the image. Address and size are /// return in \p ImageGlobal, the global name is passed in \p ImageGlobal. Error getGlobalMetadataFromImage(GenericDeviceTy &Device, diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp index b55509c2f28ff..3f798a908e736 100644 --- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp +++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp @@ -717,6 +717,9 @@ Error GenericDeviceTy::init(GenericPluginTy &Plugin) { } Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) { + for (DeviceImageTy *Image : LoadedImages) + if (auto Err = callGlobalDestructors(Plugin, *Image)) + return std::move(Err); if (OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::AllocationTracker)) { GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler(); @@ -839,6 +842,10 @@ GenericDeviceTy::loadBinary(GenericPluginTy &Plugin, } #endif + // Call any global constructors present on the device. + if (auto Err = callGlobalConstructors(Plugin, *Image)) + return std::move(Err); + // Return the pointer to the table of entries. return Image->getOffloadEntryTable(); } diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h index d1294405c04b3..f09ae24163dfc 100644 --- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h +++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h @@ -671,6 +671,20 @@ struct GenericDeviceTy : public DeviceAllocatorTy { Error synchronize(__tgt_async_info *AsyncInfo); virtual Error synchronizeImpl(__tgt_async_info &AsyncInfo) = 0; + /// Invokes any global constructors on the device if present and is required + /// by the target. + virtual Error callGlobalConstructors(GenericPluginTy &Plugin, + DeviceImageTy &Image) { + return Error::success(); + } + + /// Invokes any global destructors on the device if present and is required + /// by the target. + virtual Error callGlobalDestructors(GenericPluginTy &Plugin, + DeviceImageTy &Image) { + return Error::success(); + } + /// Query for the completion of the pending operations on the __tgt_async_info /// structure in a non-blocking manner. Error queryAsync(__tgt_async_info *AsyncInfo); diff --git a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp index 84c0fd7f724ee..a6e28574a7f08 100644 --- a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp @@ -377,6 +377,16 @@ struct CUDADeviceTy : public GenericDeviceTy { return Plugin::success(); } + virtual Error callGlobalConstructors(GenericPluginTy &Plugin, + DeviceImageTy &Image) override { + return callGlobalCtorDtorCommon(Plugin, Image, /*IsCtor=*/true); + } + + virtual Error callGlobalDestructors(GenericPluginTy &Plugin, + DeviceImageTy &Image) override { + return callGlobalCtorDtorCommon(Plugin, Image, /*IsCtor=*/false); + } + /// Allocate and construct a CUDA kernel. Expected constructKernel(const __tgt_offload_entry &KernelEntry) override { @@ -1038,6 +1048,106 @@ struct CUDADeviceTy : public GenericDeviceTy { using CUDAStreamManagerTy = GenericDeviceResourceManagerTy; using CUDAEventManagerTy = GenericDeviceResourceManagerTy; + Error callGlobalCtorDtorCommon(GenericPluginTy &Plugin, DeviceImageTy &Image, + bool IsCtor) { + const char *KernelName = IsCtor ? "nvptx$device$init" : "nvptx$device$fini"; + // Perform a quick check for the named kernel in the image. The kernel + // should be created by the 'nvptx-lower-ctor-dtor' pass. + GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler(); + GlobalTy Global(KernelName, sizeof(void *)); + if (auto Err = Handler.getGlobalMetadataFromImage(*this, Image, Global)) { + consumeError(std::move(Err)); + return Plugin::success(); + } + + // The Nvidia backend cannot handle creating the ctor / dtor array + // automatically so we must create it ourselves. The backend will emit + // several globals that contain function pointers we can call. These are + // prefixed with a known name due to Nvidia's lack of section support. + const ELF64LEObjectFile *ELFObj = + Handler.getOrCreateELFObjectFile(*this, Image); + if (!ELFObj) + return Plugin::error("Unable to create ELF object for image %p", + Image.getStart()); + + // Search for all symbols that contain a constructor or destructor. + SmallVector> Funcs; + for (ELFSymbolRef Sym : ELFObj->symbols()) { + auto NameOrErr = Sym.getName(); + if (!NameOrErr) + return NameOrErr.takeError(); + + if (!NameOrErr->starts_with(IsCtor ? "__init_array_object_" + : "__fini_array_object_")) + continue; + + uint16_t Priority; + if (NameOrErr->rsplit('_').second.getAsInteger(10, Priority)) + return Plugin::error("Invalid priority for constructor or destructor"); + + Funcs.emplace_back(*NameOrErr, Priority); + } + + // Sort the created array to be in priority order. + llvm::sort(Funcs, [=](auto x, auto y) { return x.second < y.second; }); + + // Allocate a buffer to store all of the known constructor / destructor + // functions in so we can iterate them on the device. + void *Buffer = + allocate(Funcs.size() * sizeof(void *), nullptr, TARGET_ALLOC_DEVICE); + if (!Buffer) + return Plugin::error("Failed to allocate memory for global buffer"); + + auto *GlobalPtrStart = reinterpret_cast(Buffer); + auto *GlobalPtrStop = reinterpret_cast(Buffer) + Funcs.size(); + + SmallVector FunctionPtrs(Funcs.size()); + std::size_t Idx = 0; + for (auto [Name, Priority] : Funcs) { + GlobalTy FunctionAddr(Name.str(), sizeof(void *), &FunctionPtrs[Idx++]); + if (auto Err = Handler.readGlobalFromDevice(*this, Image, FunctionAddr)) + return std::move(Err); + } + + // Copy the local buffer to the device. + if (auto Err = dataSubmit(GlobalPtrStart, FunctionPtrs.data(), + FunctionPtrs.size() * sizeof(void *), nullptr)) + return std::move(Err); + + // Copy the created buffer to the appropriate symbols so the kernel can + // iterate through them. + GlobalTy StartGlobal(IsCtor ? "__init_array_start" : "__fini_array_start", + sizeof(void *), &GlobalPtrStart); + if (auto Err = Handler.writeGlobalToDevice(*this, Image, StartGlobal)) + return std::move(Err); + + GlobalTy StopGlobal(IsCtor ? "__init_array_end" : "__fini_array_end", + sizeof(void *), &GlobalPtrStop); + if (auto Err = Handler.writeGlobalToDevice(*this, Image, StopGlobal)) + return std::move(Err); + + CUDAKernelTy CUDAKernel(KernelName); + + if (auto Err = CUDAKernel.init(*this, Image)) + return std::move(Err); + + AsyncInfoWrapperTy AsyncInfoWrapper(*this, nullptr); + + KernelArgsTy KernelArgs = {}; + if (auto Err = CUDAKernel.launchImpl(*this, /*NumThread=*/1u, + /*NumBlocks=*/1ul, KernelArgs, nullptr, + AsyncInfoWrapper)) + return std::move(Err); + + Error Err = Plugin::success(); + AsyncInfoWrapper.finalize(Err); + + if (free(Buffer, TARGET_ALLOC_DEVICE) != OFFLOAD_SUCCESS) + return Plugin::error("Failed to free memory for global buffer"); + + return std::move(Err); + } + /// Stream manager for CUDA streams. CUDAStreamManagerTy CUDAStreamManager; diff --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp index fdedf2ee456ac..d74592035bf9e 100644 --- a/openmp/libomptarget/src/rtl.cpp +++ b/openmp/libomptarget/src/rtl.cpp @@ -313,12 +313,18 @@ static void registerGlobalCtorsDtorsForImage(__tgt_bin_desc *Desc, DP("Adding ctor " DPxMOD " to the pending list.\n", DPxPTR(Entry->addr)); Device.PendingCtorsDtors[Desc].PendingCtors.push_back(Entry->addr); + MESSAGE("WARNING: Calling deprecated constructor for entry %s will be " + "removed in a future release \n", + Entry->name); } else if (Entry->flags & OMP_DECLARE_TARGET_DTOR) { // Dtors are pushed in reverse order so they are executed from end // to beginning when unregistering the library! DP("Adding dtor " DPxMOD " to the pending list.\n", DPxPTR(Entry->addr)); Device.PendingCtorsDtors[Desc].PendingDtors.push_front(Entry->addr); + MESSAGE("WARNING: Calling deprecated destructor for entry %s will be " + "removed in a future release \n", + Entry->name); } if (Entry->flags & OMP_DECLARE_TARGET_LINK) { @@ -544,7 +550,8 @@ void RTLsTy::unregisterLib(__tgt_bin_desc *Desc) { if (Device.PendingCtorsDtors[Desc].PendingCtors.empty()) { AsyncInfoTy AsyncInfo(Device); for (auto &Dtor : Device.PendingCtorsDtors[Desc].PendingDtors) { - int Rc = target(nullptr, Device, Dtor, CTorDTorKernelArgs, AsyncInfo); + int Rc = + target(nullptr, Device, Dtor, CTorDTorKernelArgs, AsyncInfo); if (Rc != OFFLOAD_SUCCESS) { DP("Running destructor " DPxMOD " failed.\n", DPxPTR(Dtor)); } diff --git a/openmp/libomptarget/test/libc/global_ctor_dtor.cpp b/openmp/libomptarget/test/libc/global_ctor_dtor.cpp new file mode 100644 index 0000000000000..a2d2a4833c369 --- /dev/null +++ b/openmp/libomptarget/test/libc/global_ctor_dtor.cpp @@ -0,0 +1,37 @@ +// RUN: %libomptarget-compilexx-run-and-check-generic + +// REQUIRES: libc + +#include + +#pragma omp begin declare target device_type(nohost) + +// CHECK: void ctor1() +// CHECK: void ctor2() +// CHECK: void ctor3() +[[gnu::constructor(101)]] void ctor1() { puts(__PRETTY_FUNCTION__); } +[[gnu::constructor(102)]] void ctor2() { puts(__PRETTY_FUNCTION__); } +[[gnu::constructor(103)]] void ctor3() { puts(__PRETTY_FUNCTION__); } + +struct S { + S() { puts(__PRETTY_FUNCTION__); } + ~S() { puts(__PRETTY_FUNCTION__); } +}; + +// CHECK: S::S() +// CHECK: S::~S() +S s; + +// CHECK: void dtor3() +// CHECK: void dtor2() +// CHECK: void dtor1() +[[gnu::destructor(101)]] void dtor1() { puts(__PRETTY_FUNCTION__); } +[[gnu::destructor(103)]] void dtor3() { puts(__PRETTY_FUNCTION__); } +[[gnu::destructor(102)]] void dtor2() { puts(__PRETTY_FUNCTION__); } + +#pragma omp end declare target + +int main() { +#pragma omp target + ; +}