Skip to content

Commit d8f7ea3

Browse files
committed
AMDGPU: Enable FeatureFlatForGlobal on Volcanic Islands
Accomplishes what r292982 was supposed to, which ended up only really making the necessary test changes. This should be applied to the 4.0 branch. Patch by Vedran Miletić <vedran@miletic.net> llvm-svn: 293310
1 parent 3650df1 commit d8f7ea3

File tree

5 files changed

+63
-37
lines changed

5 files changed

+63
-37
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -305,12 +305,6 @@ def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler",
305305
"Enable SI Machine Scheduler"
306306
>;
307307

308-
def FeatureNoAddr64 : SubtargetFeature<"mubuf-no-addr64",
309-
"NoAddr64",
310-
"true",
311-
"MUBUF instructions have addr64 bit"
312-
>;
313-
314308
// Unless +-flat-for-global is specified, turn on FlatForGlobal for
315309
// all OS-es on VI and newer hardware to avoid assertion failures due
316310
// to missing ADDR64 variants of MUBUF instructions.
@@ -320,8 +314,7 @@ def FeatureNoAddr64 : SubtargetFeature<"mubuf-no-addr64",
320314
def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
321315
"FlatForGlobal",
322316
"true",
323-
"Force to generate flat instruction for global",
324-
[FeatureNoAddr64]
317+
"Force to generate flat instruction for global"
325318
>;
326319

327320
// Dummy feature used to disable assembler instructions.
@@ -374,7 +367,7 @@ def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
374367
FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
375368
FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
376369
FeatureScalarStores, FeatureInv2PiInlineImm, FeatureSDWA,
377-
FeatureDPP, FeatureNoAddr64
370+
FeatureDPP
378371
]
379372
>;
380373

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,13 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
4949

5050
ParseSubtargetFeatures(GPU, FullFS);
5151

52+
// Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es
53+
// on VI and newer hardware to avoid assertion failures due to missing ADDR64
54+
// variants of MUBUF instructions.
55+
if (!hasAddr64() && !FS.contains("flat-for-global")) {
56+
FlatForGlobal = true;
57+
}
58+
5259
// FIXME: I don't think think Evergreen has any useful support for
5360
// denormals, but should be checked. Should we issue a warning somewhere
5461
// if someone tries to enable these?
@@ -82,7 +89,6 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
8289
FP64FP16Denormals(false),
8390
FPExceptions(false),
8491
FlatForGlobal(false),
85-
NoAddr64(false),
8692
UnalignedScratchAccess(false),
8793
UnalignedBufferAccess(false),
8894

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,6 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
8585
bool FP64FP16Denormals;
8686
bool FPExceptions;
8787
bool FlatForGlobal;
88-
bool NoAddr64;
8988
bool UnalignedScratchAccess;
9089
bool UnalignedBufferAccess;
9190
bool EnableXNACK;

llvm/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll

Lines changed: 0 additions & 26 deletions
This file was deleted.
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-DEFAULT -check-prefix=ALL %s
2+
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-NODEFAULT -check-prefix=ALL %s
3+
; RUN: llc -mtriple=amdgcn-- -mcpu=tonga < %s | FileCheck -check-prefix=HSA-NOADDR64 -check-prefix=ALL %s
4+
; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=NOHSA-DEFAULT -check-prefix=ALL %s
5+
; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=NOHSA-NODEFAULT -check-prefix=ALL %s
6+
; RUN: llc -mtriple=amdgcn-- -mcpu=tonga < %s | FileCheck -check-prefix=NOHSA-NOADDR64 -check-prefix=ALL %s
7+
8+
9+
; There are no stack objects even though flat is used by default, so
10+
; flat_scratch_init should be disabled.
11+
12+
; ALL-LABEL: {{^}}test:
13+
; HSA: .amd_kernel_code_t
14+
; HSA: enable_sgpr_flat_scratch_init = 0
15+
; HSA: .end_amd_kernel_code_t
16+
17+
; ALL-NOT: flat_scr
18+
19+
; HSA-DEFAULT: flat_store_dword
20+
; HSA-NODEFAULT: buffer_store_dword
21+
; HSA-NOADDR64: flat_store_dword
22+
23+
; NOHSA-DEFAULT: buffer_store_dword
24+
; NOHSA-NODEFAULT: flat_store_dword
25+
; NOHSA-NOADDR64: flat_store_dword
26+
define void @test(i32 addrspace(1)* %out) {
27+
entry:
28+
store i32 0, i32 addrspace(1)* %out
29+
ret void
30+
}
31+
32+
; HSA-DEFAULT: flat_store_dword
33+
; HSA-NODEFAULT: buffer_store_dword
34+
; HSA-NOADDR64: flat_store_dword
35+
36+
; NOHSA-DEFAULT: buffer_store_dword
37+
; NOHSA-NODEFAULT: flat_store_dword
38+
; NOHSA-NOADDR64: flat_store_dword
39+
define void @test_addr64(i32 addrspace(1)* %out) {
40+
entry:
41+
%out.addr = alloca i32 addrspace(1)*, align 4
42+
43+
store i32 addrspace(1)* %out, i32 addrspace(1)** %out.addr, align 4
44+
%ld0 = load i32 addrspace(1)*, i32 addrspace(1)** %out.addr, align 4
45+
46+
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %ld0, i32 0
47+
store i32 1, i32 addrspace(1)* %arrayidx, align 4
48+
49+
%ld1 = load i32 addrspace(1)*, i32 addrspace(1)** %out.addr, align 4
50+
%arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %ld1, i32 1
51+
store i32 2, i32 addrspace(1)* %arrayidx1, align 4
52+
53+
ret void
54+
}

0 commit comments

Comments
 (0)