diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp index 8b1cd83af2396..e08efed1cefc0 100644 --- a/clang/lib/CodeGen/CGDecl.cpp +++ b/clang/lib/CodeGen/CGDecl.cpp @@ -212,7 +212,11 @@ void CodeGenFunction::EmitVarDecl(const VarDecl &D) { // Static sampler variables translated to function calls. if (D.getType()->isSamplerT()) return; - + if (D.hasAttr()) { + llvm::GlobalValue::LinkageTypes Linkage = + CGM.getLLVMLinkageVarDefinition(&D); + return EmitStaticVarDecl(D, Linkage); + } llvm::GlobalValue::LinkageTypes Linkage = CGM.getLLVMLinkageVarDefinition(&D); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 3eeb1718e455a..57f9734604362 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -5354,6 +5354,11 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty, // Lookup the entry, lazily creating it if necessary. llvm::GlobalValue *Entry = GetGlobalValue(MangledName); unsigned TargetAS = getContext().getTargetAddressSpace(AddrSpace); + if (D && D->hasAttr() && getLangOpts().OpenMP && + getTarget().getTriple().isGPU()) { + Entry->setLinkage(llvm::GlobalValue::InternalLinkage); + AddrSpace = LangAS::cuda_shared; + } if (Entry) { if (WeakRefReferences.erase(Entry)) { if (D && !D->hasAttr()) @@ -5734,6 +5739,9 @@ LangAS CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D) { LangAS AS; if (OpenMPRuntime->hasAllocateAttributeForGlobalVar(D, AS)) return AS; + if (D && D->hasAttr()) { + return LangAS::cuda_shared; // maps to target addressspace 3 on NVPTX/AMD + } } return getTargetCodeGenInfo().getGlobalVarAddressSpace(*this, D); } @@ -7623,6 +7631,9 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { EmitOMPThreadPrivateDecl(cast(D)); break; + case Decl::OMPGroupPrivate: + break; + case Decl::OMPAllocate: EmitOMPAllocateDecl(cast(D)); break; diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 81c591a00cfc6..1725e24f4ad06 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -3275,7 +3275,7 @@ SemaOpenMP::ActOnOpenMPGroupPrivateDirective(SourceLocation Loc, ArrayRef VarList) { if (!getLangOpts().OpenMP || getLangOpts().OpenMP < 60) { Diag(Loc, diag::err_omp_unexpected_directive) - << getOpenMPDirectiveName(OMPD_groupprivate, getLangOpts().OpenMP); + << 1 << getOpenMPDirectiveName(OMPD_groupprivate, getLangOpts().OpenMP); return nullptr; } if (OMPGroupPrivateDecl *D = CheckOMPGroupPrivateDecl(Loc, VarList)) { diff --git a/clang/test/OpenMP/groupprivate_codegen.cpp b/clang/test/OpenMP/groupprivate_codegen.cpp new file mode 100644 index 0000000000000..1a29fae6eb6d6 --- /dev/null +++ b/clang/test/OpenMP/groupprivate_codegen.cpp @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp-is-device -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK +// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-is-device -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK +// +// This test ensures that variables marked 'groupprivate' are emitted as +// device globals in the GPU shared address space (addrspace(3)). +// The test is GPU-only and checks the LLVM IR for addrspace(3). +// + +int group_var; + +#pragma omp groupprivate(group_var) + +void foo() { +#pragma omp target teams num_teams(4) thread_limit(100) +{ + // simple use so the var is referenced in device codegen + group_var = group_var + 1; +} +} + +// CHECK: @group_var = global i32 0, align 4, addrspace(3) +// CHECK: store i32 %{{.*}}, i32 addrspace(3)* @group_var, align 4 + +// CHECK: @group_var = global i32 0, align 4, addrspace(3) +// CHECK: store i32 %{{.*}}, i32 addrspace(3)* @group_var, align 4