Skip to content

Commit

Permalink
[AMDGPU] Add an experimental buffer fat pointer address space.
Browse files Browse the repository at this point in the history
Add an experimental buffer fat pointer address space that is currently
unhandled in the backend. This commit reserves address space 7 as a
non-integral pointer repsenting the 160-bit fat pointer (128-bit buffer
descriptor + 32-bit offset) that is heavily used in graphics workloads
using the AMDGPU backend.

Differential Revision: https://reviews.llvm.org/D58957

llvm-svn: 356373
  • Loading branch information
Neil Henning committed Mar 18, 2019
1 parent 6063393 commit 523dab0
Show file tree
Hide file tree
Showing 9 changed files with 96 additions and 26 deletions.
14 changes: 11 additions & 3 deletions llvm/docs/AMDGPUUsage.rst
Expand Up @@ -281,17 +281,25 @@ LLVM Address Space number is used throughout LLVM (for example, in LLVM IR).
.. table:: Address Space Mapping
:name: amdgpu-address-space-mapping-table

================== =================
================== =================================
LLVM Address Space Memory Space
================== =================
================== =================================
0 Generic (Flat)
1 Global
2 Region (GDS)
3 Local (group/LDS)
4 Constant
5 Private (Scratch)
6 Constant 32-bit
================== =================
7 Buffer Fat Pointer (experimental)
================== =================================

The buffer fat pointer is an experimental address space that is currently
unsupported in the backend. It exposes a non-integral pointer that is in future
intended to support the modelling of 128-bit buffer descriptors + a 32-bit
offset into the buffer descriptor (in total encapsulating a 160-bit 'pointer'),
allowing us to use normal LLVM load/store/atomic operations to model the buffer
descriptors used heavily in graphics workloads targeting the backend.

.. _amdgpu-memory-scopes:

Expand Down
12 changes: 7 additions & 5 deletions llvm/lib/Target/AMDGPU/AMDGPU.h
Expand Up @@ -245,21 +245,23 @@ enum TargetIndex {
namespace AMDGPUAS {
enum : unsigned {
// The maximum value for flat, generic, local, private, constant and region.
MAX_AMDGPU_ADDRESS = 6,
MAX_AMDGPU_ADDRESS = 7,

FLAT_ADDRESS = 0, ///< Address space for flat memory.
GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
REGION_ADDRESS = 2, ///< Address space for region memory. (GDS)

CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2)
CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2).
LOCAL_ADDRESS = 3, ///< Address space for local memory.
PRIVATE_ADDRESS = 5, ///< Address space for private memory.

CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory
CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory.

/// Address space for direct addressible parameter memory (CONST0)
BUFFER_FAT_POINTER = 7, ///< Address space for 160-bit buffer fat pointers.

/// Address space for direct addressible parameter memory (CONST0).
PARAM_D_ADDRESS = 6,
/// Address space for indirect addressible parameter memory (VTX1)
/// Address space for indirect addressible parameter memory (VTX1).
PARAM_I_ADDRESS = 7,

// Do not re-order the CONSTANT_BUFFER_* enums. Several places depend on
Expand Down
23 changes: 12 additions & 11 deletions llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
Expand Up @@ -53,20 +53,21 @@ void AMDGPUAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
}

// These arrays are indexed by address space value enum elements 0 ... to 6
static const AliasResult ASAliasRules[7][7] = {
/* Flat Global Region Group Constant Private Constant 32-bit */
/* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias},
/* Global */ {MayAlias, MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , MayAlias},
/* Region */ {MayAlias, NoAlias , NoAlias , NoAlias, MayAlias, NoAlias , MayAlias},
/* Group */ {MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , NoAlias , NoAlias},
/* Constant */ {MayAlias, MayAlias, MayAlias, NoAlias , NoAlias, NoAlias , MayAlias},
/* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, NoAlias},
/* Constant 32-bit */ {MayAlias, MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , NoAlias}
// These arrays are indexed by address space value enum elements 0 ... to 7
static const AliasResult ASAliasRules[8][8] = {
/* Flat Global Region Group Constant Private Constant 32-bit Buffer Fat Ptr */
/* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias},
/* Global */ {MayAlias, MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , MayAlias, MayAlias},
/* Region */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias, NoAlias , MayAlias, NoAlias},
/* Group */ {MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , NoAlias , NoAlias , NoAlias},
/* Constant */ {MayAlias, MayAlias, MayAlias, NoAlias , NoAlias , NoAlias , MayAlias, MayAlias},
/* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, NoAlias , NoAlias},
/* Constant 32-bit */ {MayAlias, MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , NoAlias , MayAlias},
/* Buffer Fat Ptr */ {MayAlias, MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , MayAlias, MayAlias}
};

static AliasResult getAliasResult(unsigned AS1, unsigned AS2) {
static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 6, "Addr space out of range");
static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 7, "Addr space out of range");

if (AS1 > AMDGPUAS::MAX_AMDGPU_ADDRESS || AS2 > AMDGPUAS::MAX_AMDGPU_ADDRESS)
return MayAlias;
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Expand Up @@ -295,10 +295,11 @@ static StringRef computeDataLayout(const Triple &TT) {
}

// 32-bit private, local, and region pointers. 64-bit global, constant and
// flat.
// flat, non-integral buffer fat pointers.
return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
"-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
"-ni:7";
}

LLVM_READNONE
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
Expand Up @@ -253,7 +253,8 @@ unsigned GCNTTIImpl::getStoreVectorFactor(unsigned VF, unsigned StoreSize,
unsigned GCNTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS ||
AddrSpace == AMDGPUAS::CONSTANT_ADDRESS ||
AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
AddrSpace == AMDGPUAS::BUFFER_FAT_POINTER) {
return 512;
}

Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Expand Up @@ -1046,7 +1046,8 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
return isLegalGlobalAddressingMode(AM);

if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
AS == AMDGPUAS::BUFFER_FAT_POINTER) {
// If the offset isn't a multiple of 4, it probably isn't going to be
// correctly aligned.
// FIXME: Can we get the real alignment here?
Expand Down
40 changes: 40 additions & 0 deletions llvm/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll
Expand Up @@ -50,3 +50,43 @@ define void @test_1_999(i8 addrspace(1)* %p, i8 addrspace(999)* %p1) {
define void @test_999_1(i8 addrspace(999)* %p, i8 addrspace(1)* %p1) {
ret void
}

; CHECK: MayAlias: i8 addrspace(7)* %p, i8* %p1
define void @test_7_0(i8 addrspace(7)* %p, i8 addrspace(0)* %p1) {
ret void
}

; CHECK: MayAlias: i8 addrspace(1)* %p1, i8 addrspace(7)* %p
define void @test_7_1(i8 addrspace(7)* %p, i8 addrspace(1)* %p1) {
ret void
}

; CHECK: NoAlias: i8 addrspace(2)* %p1, i8 addrspace(7)* %p
define void @test_7_2(i8 addrspace(7)* %p, i8 addrspace(2)* %p1) {
ret void
}

; CHECK: NoAlias: i8 addrspace(3)* %p1, i8 addrspace(7)* %p
define void @test_7_3(i8 addrspace(7)* %p, i8 addrspace(3)* %p1) {
ret void
}

; CHECK: MayAlias: i8 addrspace(4)* %p1, i8 addrspace(7)* %p
define void @test_7_4(i8 addrspace(7)* %p, i8 addrspace(4)* %p1) {
ret void
}

; CHECK: NoAlias: i8 addrspace(5)* %p1, i8 addrspace(7)* %p
define void @test_7_5(i8 addrspace(7)* %p, i8 addrspace(5)* %p1) {
ret void
}

; CHECK: MayAlias: i8 addrspace(6)* %p1, i8 addrspace(7)* %p
define void @test_7_6(i8 addrspace(7)* %p, i8 addrspace(6)* %p1) {
ret void
}

; CHECK: MayAlias: i8 addrspace(7)* %p, i8 addrspace(7)* %p1
define void @test_7_7(i8 addrspace(7)* %p, i8 addrspace(7)* %p1) {
ret void
}
5 changes: 2 additions & 3 deletions llvm/test/CodeGen/AMDGPU/r600.amdgpu-alias-analysis.ll
@@ -1,7 +1,6 @@
; RUN: opt -mtriple=r600-- -O3 -aa-eval -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck %s

; CHECK: MayAlias: i8 addrspace(5)* %p, i8 addrspace(7)* %p1

define amdgpu_kernel void @test(i8 addrspace(5)* %p, i8 addrspace(7)* %p1) {
; CHECK: MayAlias: i8 addrspace(5)* %p, i8 addrspace(999)* %p1
define amdgpu_kernel void @test(i8 addrspace(5)* %p, i8 addrspace(999)* %p1) {
ret void
}
17 changes: 17 additions & 0 deletions llvm/test/CodeGen/AMDGPU/vectorize-buffer-fat-pointer.ll
@@ -0,0 +1,17 @@
; RUN: opt -S -mtriple=amdgcn-- -load-store-vectorizer < %s | FileCheck -check-prefix=OPT %s

; OPT-LABEL: @func(
define void @func(i32 addrspace(7)* %out) {
entry:
%a0 = getelementptr i32, i32 addrspace(7)* %out, i32 0
%a1 = getelementptr i32, i32 addrspace(7)* %out, i32 1
%a2 = getelementptr i32, i32 addrspace(7)* %out, i32 2
%a3 = getelementptr i32, i32 addrspace(7)* %out, i32 3

; OPT: store <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> addrspace(7)* %0, align 4
store i32 0, i32 addrspace(7)* %a0
store i32 1, i32 addrspace(7)* %a1
store i32 2, i32 addrspace(7)* %a2
store i32 3, i32 addrspace(7)* %a3
ret void
}

0 comments on commit 523dab0

Please sign in to comment.