Skip to content

Commit

Permalink
[OpenCL] Add global_device and global_host address spaces
Browse files Browse the repository at this point in the history
This patch introduces 2 new address spaces in OpenCL: global_device and global_host
which are a subset of a global address space, so the address space scheme will be
looking like:

```
generic->global->host
                          ->device
             ->private
             ->local
constant
```

Justification: USM allocations may be associated with both host and device memory. We
want to give users a way to tell the compiler the allocation type of a USM pointer for
optimization purposes. (Link to the Unified Shared Memory extension:
https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc)

Before this patch USM pointer could be only in opencl_global
address space, hence a device backend can't tell if a particular pointer
points to host or device memory. On FPGAs at least we can generate more
efficient hardware code if the user tells us where the pointer can point -
being able to distinguish between these types of pointers at compile time
allows us to instantiate simpler load-store units to perform memory
transactions.

Patch by Dmitry Sidorov.

Reviewed By: Anastasia

Differential Revision: https://reviews.llvm.org/D82174
  • Loading branch information
bader committed Jul 29, 2020
1 parent 2c662f3 commit 8d27be8
Show file tree
Hide file tree
Showing 22 changed files with 262 additions and 29 deletions.
5 changes: 5 additions & 0 deletions clang/include/clang/AST/Type.h
Expand Up @@ -480,6 +480,11 @@ class Qualifiers {
// Otherwise in OpenCLC v2.0 s6.5.5: every address space except
// for __constant can be used as __generic.
(A == LangAS::opencl_generic && B != LangAS::opencl_constant) ||
// We also define global_device and global_host address spaces,
// to distinguish global pointers allocated on host from pointers
// allocated on device, which are a subset of __global.
(A == LangAS::opencl_global && (B == LangAS::opencl_global_device ||
B == LangAS::opencl_global_host)) ||
// Consider pointer size address spaces to be equivalent to default.
((isPtrSizeAddressSpace(A) || A == LangAS::Default) &&
(isPtrSizeAddressSpace(B) || B == LangAS::Default));
Expand Down
2 changes: 2 additions & 0 deletions clang/include/clang/Basic/AddressSpaces.h
Expand Up @@ -36,6 +36,8 @@ enum class LangAS : unsigned {
opencl_constant,
opencl_private,
opencl_generic,
opencl_global_device,
opencl_global_host,

// CUDA specific address spaces.
cuda_device,
Expand Down
10 changes: 10 additions & 0 deletions clang/include/clang/Basic/Attr.td
Expand Up @@ -1178,6 +1178,16 @@ def OpenCLGlobalAddressSpace : TypeAttr {
let Documentation = [OpenCLAddressSpaceGlobalDocs];
}

def OpenCLGlobalDeviceAddressSpace : TypeAttr {
let Spellings = [Clang<"opencl_global_device">];
let Documentation = [OpenCLAddressSpaceGlobalExtDocs];
}

def OpenCLGlobalHostAddressSpace : TypeAttr {
let Spellings = [Clang<"opencl_global_host">];
let Documentation = [OpenCLAddressSpaceGlobalExtDocs];
}

def OpenCLLocalAddressSpace : TypeAttr {
let Spellings = [Keyword<"__local">, Keyword<"local">, Clang<"opencl_local">];
let Documentation = [OpenCLAddressSpaceLocalDocs];
Expand Down
24 changes: 24 additions & 0 deletions clang/include/clang/Basic/AttrDocs.td
Expand Up @@ -3123,6 +3123,30 @@ scope) variables and static local variable as well.
}];
}

def OpenCLAddressSpaceGlobalExtDocs : Documentation {
let Category = DocOpenCLAddressSpaces;
let Heading = "[[clang::opencl_global_device]], [[clang::opencl_global_host]]";
let Content = [{
The ``global_device`` and ``global_host`` address space attributes specify that
an object is allocated in global memory on the device/host. It helps to
distinguish USM (Unified Shared Memory) pointers that access global device
memory from those that access global host memory. These new address spaces are
a subset of the ``__global/opencl_global`` address space, the full address space
set model for OpenCL 2.0 with the extension looks as follows:
generic->global->host
->device
->private
->local
constant

As ``global_device`` and ``global_host`` are a subset of
``__global/opencl_global`` address spaces it is allowed to convert
``global_device`` and ``global_host`` address spaces to
``__global/opencl_global`` address spaces (following ISO/IEC TR 18037 5.1.3
"Address space nesting and rules for pointers).
}];
}

def OpenCLAddressSpaceLocalDocs : Documentation {
let Category = DocOpenCLAddressSpaces;
let Heading = "__local, local, [[clang::opencl_local]]";
Expand Down
4 changes: 4 additions & 0 deletions clang/include/clang/Sema/ParsedAttr.h
Expand Up @@ -606,6 +606,10 @@ class ParsedAttr final
return LangAS::opencl_constant;
case ParsedAttr::AT_OpenCLGlobalAddressSpace:
return LangAS::opencl_global;
case ParsedAttr::AT_OpenCLGlobalDeviceAddressSpace:
return LangAS::opencl_global_device;
case ParsedAttr::AT_OpenCLGlobalHostAddressSpace:
return LangAS::opencl_global_host;
case ParsedAttr::AT_OpenCLLocalAddressSpace:
return LangAS::opencl_local;
case ParsedAttr::AT_OpenCLPrivateAddressSpace:
Expand Down
26 changes: 14 additions & 12 deletions clang/lib/AST/ASTContext.cpp
Expand Up @@ -919,18 +919,20 @@ static const LangASMap *getAddressSpaceMap(const TargetInfo &T,
// The fake address space map must have a distinct entry for each
// language-specific address space.
static const unsigned FakeAddrSpaceMap[] = {
0, // Default
1, // opencl_global
3, // opencl_local
2, // opencl_constant
0, // opencl_private
4, // opencl_generic
5, // cuda_device
6, // cuda_constant
7, // cuda_shared
8, // ptr32_sptr
9, // ptr32_uptr
10 // ptr64
0, // Default
1, // opencl_global
3, // opencl_local
2, // opencl_constant
0, // opencl_private
4, // opencl_generic
5, // opencl_global_device
6, // opencl_global_host
7, // cuda_device
8, // cuda_constant
9, // cuda_shared
10, // ptr32_sptr
11, // ptr32_uptr
12 // ptr64
};
return &FakeAddrSpaceMap;
} else {
Expand Down
41 changes: 32 additions & 9 deletions clang/lib/AST/ItaniumMangle.cpp
Expand Up @@ -2388,16 +2388,39 @@ void CXXNameMangler::mangleQualifiers(Qualifiers Quals, const DependentAddressSp
switch (AS) {
default: llvm_unreachable("Not a language specific address space");
// <OpenCL-addrspace> ::= "CL" [ "global" | "local" | "constant" |
// "private"| "generic" ]
case LangAS::opencl_global: ASString = "CLglobal"; break;
case LangAS::opencl_local: ASString = "CLlocal"; break;
case LangAS::opencl_constant: ASString = "CLconstant"; break;
case LangAS::opencl_private: ASString = "CLprivate"; break;
case LangAS::opencl_generic: ASString = "CLgeneric"; break;
// "private"| "generic" | "device" |
// "host" ]
case LangAS::opencl_global:
ASString = "CLglobal";
break;
case LangAS::opencl_global_device:
ASString = "CLdevice";
break;
case LangAS::opencl_global_host:
ASString = "CLhost";
break;
case LangAS::opencl_local:
ASString = "CLlocal";
break;
case LangAS::opencl_constant:
ASString = "CLconstant";
break;
case LangAS::opencl_private:
ASString = "CLprivate";
break;
case LangAS::opencl_generic:
ASString = "CLgeneric";
break;
// <CUDA-addrspace> ::= "CU" [ "device" | "constant" | "shared" ]
case LangAS::cuda_device: ASString = "CUdevice"; break;
case LangAS::cuda_constant: ASString = "CUconstant"; break;
case LangAS::cuda_shared: ASString = "CUshared"; break;
case LangAS::cuda_device:
ASString = "CUdevice";
break;
case LangAS::cuda_constant:
ASString = "CUconstant";
break;
case LangAS::cuda_shared:
ASString = "CUshared";
break;
// <ptrsize-addrspace> ::= [ "ptr32_sptr" | "ptr32_uptr" | "ptr64" ]
case LangAS::ptr32_sptr:
ASString = "ptr32_sptr";
Expand Down
8 changes: 7 additions & 1 deletion clang/lib/AST/MicrosoftMangle.cpp
Expand Up @@ -1798,7 +1798,7 @@ void MicrosoftCXXNameMangler::mangleAddressSpaceType(QualType T,
// where:
// <language_addr_space> ::= <OpenCL-addrspace> | <CUDA-addrspace>
// <OpenCL-addrspace> ::= "CL" [ "global" | "local" | "constant" |
// "private"| "generic" ]
// "private"| "generic" | "device" | "host" ]
// <CUDA-addrspace> ::= "CU" [ "device" | "constant" | "shared" ]
// Note that the above were chosen to match the Itanium mangling for this.
//
Expand All @@ -1823,6 +1823,12 @@ void MicrosoftCXXNameMangler::mangleAddressSpaceType(QualType T,
case LangAS::opencl_global:
Extra.mangleSourceName("_ASCLglobal");
break;
case LangAS::opencl_global_device:
Extra.mangleSourceName("_ASCLdevice");
break;
case LangAS::opencl_global_host:
Extra.mangleSourceName("_ASCLhost");
break;
case LangAS::opencl_local:
Extra.mangleSourceName("_ASCLlocal");
break;
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/AST/TypePrinter.cpp
Expand Up @@ -1564,6 +1564,8 @@ void TypePrinter::printAttributedAfter(const AttributedType *T,

case attr::OpenCLPrivateAddressSpace:
case attr::OpenCLGlobalAddressSpace:
case attr::OpenCLGlobalDeviceAddressSpace:
case attr::OpenCLGlobalHostAddressSpace:
case attr::OpenCLLocalAddressSpace:
case attr::OpenCLConstantAddressSpace:
case attr::OpenCLGenericAddressSpace:
Expand Down Expand Up @@ -1866,6 +1868,10 @@ std::string Qualifiers::getAddrSpaceAsString(LangAS AS) {
return "__constant";
case LangAS::opencl_generic:
return "__generic";
case LangAS::opencl_global_device:
return "__global_device";
case LangAS::opencl_global_host:
return "__global_host";
case LangAS::cuda_device:
return "__device__";
case LangAS::cuda_constant:
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/Basic/Targets/AMDGPU.cpp
Expand Up @@ -46,6 +46,8 @@ const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
Constant, // opencl_constant
Private, // opencl_private
Generic, // opencl_generic
Global, // opencl_global_device
Global, // opencl_global_host
Global, // cuda_device
Constant, // cuda_constant
Local, // cuda_shared
Expand All @@ -61,6 +63,8 @@ const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
Constant, // opencl_constant
Private, // opencl_private
Generic, // opencl_generic
Global, // opencl_global_device
Global, // opencl_global_host
Global, // cuda_device
Constant, // cuda_constant
Local, // cuda_shared
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Basic/Targets/NVPTX.h
Expand Up @@ -30,6 +30,8 @@ static const unsigned NVPTXAddrSpaceMap[] = {
0, // opencl_private
// FIXME: generic has to be added to the target
0, // opencl_generic
1, // opencl_global_device
1, // opencl_global_host
1, // cuda_device
4, // cuda_constant
3, // cuda_shared
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Basic/Targets/SPIR.h
Expand Up @@ -28,6 +28,8 @@ static const unsigned SPIRAddrSpaceMap[] = {
2, // opencl_constant
0, // opencl_private
4, // opencl_generic
5, // opencl_global_device
6, // opencl_global_host
0, // cuda_device
0, // cuda_constant
0, // cuda_shared
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Basic/Targets/TCE.h
Expand Up @@ -35,6 +35,8 @@ static const unsigned TCEOpenCLAddrSpaceMap[] = {
4, // opencl_local
5, // opencl_constant
0, // opencl_private
1, // opencl_global_device
1, // opencl_global_host
// FIXME: generic has to be added to the target
0, // opencl_generic
0, // cuda_device
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Basic/Targets/X86.h
Expand Up @@ -30,6 +30,8 @@ static const unsigned X86AddrSpaceMap[] = {
0, // opencl_constant
0, // opencl_private
0, // opencl_generic
0, // opencl_global_device
0, // opencl_global_host
0, // cuda_device
0, // cuda_constant
0, // cuda_shared
Expand Down
18 changes: 14 additions & 4 deletions clang/lib/CodeGen/CodeGenModule.cpp
Expand Up @@ -1324,10 +1324,18 @@ static void removeImageAccessQualifier(std::string& TyName) {
// (basically all single AS CPUs).
static unsigned ArgInfoAddressSpace(LangAS AS) {
switch (AS) {
case LangAS::opencl_global: return 1;
case LangAS::opencl_constant: return 2;
case LangAS::opencl_local: return 3;
case LangAS::opencl_generic: return 4; // Not in SPIR 2.0 specs.
case LangAS::opencl_global:
return 1;
case LangAS::opencl_constant:
return 2;
case LangAS::opencl_local:
return 3;
case LangAS::opencl_generic:
return 4; // Not in SPIR 2.0 specs.
case LangAS::opencl_global_device:
return 5;
case LangAS::opencl_global_host:
return 6;
default:
return 0; // Assume private.
}
Expand Down Expand Up @@ -3792,6 +3800,8 @@ LangAS CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D) {
if (LangOpts.OpenCL) {
AddrSpace = D ? D->getType().getAddressSpace() : LangAS::opencl_global;
assert(AddrSpace == LangAS::opencl_global ||
AddrSpace == LangAS::opencl_global_device ||
AddrSpace == LangAS::opencl_global_host ||
AddrSpace == LangAS::opencl_constant ||
AddrSpace == LangAS::opencl_local ||
AddrSpace >= LangAS::FirstTargetAddressSpace);
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/Sema/SemaType.cpp
Expand Up @@ -7968,6 +7968,8 @@ static bool isAddressSpaceKind(const ParsedAttr &attr) {
return attrKind == ParsedAttr::AT_AddressSpace ||
attrKind == ParsedAttr::AT_OpenCLPrivateAddressSpace ||
attrKind == ParsedAttr::AT_OpenCLGlobalAddressSpace ||
attrKind == ParsedAttr::AT_OpenCLGlobalDeviceAddressSpace ||
attrKind == ParsedAttr::AT_OpenCLGlobalHostAddressSpace ||
attrKind == ParsedAttr::AT_OpenCLLocalAddressSpace ||
attrKind == ParsedAttr::AT_OpenCLConstantAddressSpace ||
attrKind == ParsedAttr::AT_OpenCLGenericAddressSpace;
Expand Down Expand Up @@ -8048,6 +8050,8 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type,
break;
case ParsedAttr::AT_OpenCLPrivateAddressSpace:
case ParsedAttr::AT_OpenCLGlobalAddressSpace:
case ParsedAttr::AT_OpenCLGlobalDeviceAddressSpace:
case ParsedAttr::AT_OpenCLGlobalHostAddressSpace:
case ParsedAttr::AT_OpenCLLocalAddressSpace:
case ParsedAttr::AT_OpenCLConstantAddressSpace:
case ParsedAttr::AT_OpenCLGenericAddressSpace:
Expand Down
12 changes: 12 additions & 0 deletions clang/test/AST/language_address_space_attribute.cpp
Expand Up @@ -17,6 +17,18 @@ void langas() {
// CHECK: VarDecl {{.*}} z_global '__global int *'
[[clang::opencl_global]] int *z_global;

// CHECK: VarDecl {{.*}} x_global_device '__global_device int *'
__attribute__((opencl_global_device)) int *x_global_device;

// CHECK: VarDecl {{.*}} z_global_device '__global_device int *'
[[clang::opencl_global_device]] int *z_global_device;

// CHECK: VarDecl {{.*}} x_global_host '__global_host int *'
__attribute__((opencl_global_host)) int *x_global_host;

// CHECK: VarDecl {{.*}} z_global_host '__global_host int *'
[[clang::opencl_global_host]] int *z_global_host;

// CHECK: VarDecl {{.*}} x_local '__local int *'
__attribute__((opencl_local)) int *x_local;

Expand Down
12 changes: 12 additions & 0 deletions clang/test/CodeGenCXX/mangle-address-space.cpp
Expand Up @@ -43,6 +43,10 @@ void ocl_f0(char __private *p) { }

struct ocl_OpaqueType;
typedef ocl_OpaqueType __global * ocl_OpaqueTypePtr;
typedef ocl_OpaqueType __attribute__((opencl_global_host)) * ocl_OpaqueTypePtrH;
typedef ocl_OpaqueType
__attribute__((opencl_global_device)) *
ocl_OpaqueTypePtrD;

// CHECKOCL-LABEL: define {{.*}}void @_Z6ocl_f0PU8CLglobal14ocl_OpaqueType
// WINOCL-LABEL: define {{.*}}void @"?ocl_f0@@YAXPEAU?$_ASCLglobal@$$CAUocl_OpaqueType@@@__clang@@@Z"
Expand All @@ -61,4 +65,12 @@ __constant float *ocl_f1(char __generic const *p) { return 0;}
// CHECKOCL-LABEL: define {{.*}}float* @_Z6ocl_f2PU9CLgenericKc
// WINOCL-LABEL: define {{.*}}float* @"?ocl_f2@@YAPEAU?$_ASCLgeneric@$$CAM@__clang@@QEAU?$_ASCLgeneric@$$CBD@2@@Z"
__generic float *ocl_f2(__generic char const * const p) { return 0;}

// CHECKOCL-LABEL: define {{.*}}void @_Z6ocl_f3PU6CLhost14ocl_OpaqueType
// WINOCL-LABEL: define {{.*}}void @"?ocl_f3@@YAXPEAU?$_ASCLhost@$$CAUocl_OpaqueType@@@__clang@@@Z"
void ocl_f3(ocl_OpaqueTypePtrH) {}

// CHECKOCL-LABEL: define {{.*}}void @_Z6ocl_f4PU8CLdevice14ocl_OpaqueType
// WINOCL-LABEL: define {{.*}}void @"?ocl_f4@@YAXPEAU?$_ASCLdevice@$$CAUocl_OpaqueType@@@__clang@@@Z"
void ocl_f4(ocl_OpaqueTypePtrD) {}
#endif
28 changes: 27 additions & 1 deletion clang/test/CodeGenOpenCL/address-spaces-conversions.cl
Expand Up @@ -6,7 +6,9 @@
// pointers to different address spaces

// CHECK: define void @test
void test(global int *arg_glob, generic int *arg_gen) {
void test(global int *arg_glob, generic int *arg_gen,
__attribute__((opencl_global_device)) int *arg_device,
__attribute__((opencl_global_host)) int *arg_host) {
int var_priv;
arg_gen = arg_glob; // implicit cast global -> generic
// CHECK: %{{[0-9]+}} = addrspacecast i32 addrspace(1)* %{{[0-9]+}} to i32 addrspace(4)*
Expand Down Expand Up @@ -39,6 +41,30 @@ void test(global int *arg_glob, generic int *arg_gen) {
// CHECK-NOT: bitcast
// CHECK-NOFAKE: bitcast
// CHECK-NOFAKE-NOT: addrspacecast

arg_glob = arg_device; // implicit cast
// CHECK: addrspacecast
// CHECK-NOFAKE-NOT: addrspacecast

arg_glob = arg_host; // implicit cast
// CHECK: addrspacecast
// CHECK-NOFAKE-NOT: addrspacecast

arg_glob = (global int *)arg_device; // explicit cast
// CHECK: addrspacecast
// CHECK-NOFAKE-NOT: addrspacecast

arg_glob = (global int *)arg_host; // explicit cast
// CHECK: addrspacecast
// CHECK-NOFAKE-NOT: addrspacecast

arg_device = (__attribute((opencl_global_device)) int *)arg_glob; // explicit cast
// CHECK: addrspacecast
// CHECK-NOFAKE-NOT: addrspacecast

arg_host = (__attribute((opencl_global_host)) int *)arg_glob; // explicit cast
// CHECK: addrspacecast
// CHECK-NOFAKE-NOT: addrspacecast
}

// Test ternary operator.
Expand Down

0 comments on commit 8d27be8

Please sign in to comment.