Skip to content

Commit

Permalink
AMDGPU: Enable FeatureFlatForGlobal on Volcanic Islands
Browse files Browse the repository at this point in the history
Accomplishes what r292982 was supposed to, which ended up
only really making the necessary test changes.

This should be applied to the 4.0 branch.

Patch by Vedran Miletić <vedran@miletic.net>

llvm-svn: 293310
  • Loading branch information
arsenm committed Jan 27, 2017
1 parent 3650df1 commit d8f7ea3
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 37 deletions.
11 changes: 2 additions & 9 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -305,12 +305,6 @@ def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler",
"Enable SI Machine Scheduler"
>;

def FeatureNoAddr64 : SubtargetFeature<"mubuf-no-addr64",
"NoAddr64",
"true",
"MUBUF instructions have addr64 bit"
>;

// Unless +-flat-for-global is specified, turn on FlatForGlobal for
// all OS-es on VI and newer hardware to avoid assertion failures due
// to missing ADDR64 variants of MUBUF instructions.
Expand All @@ -320,8 +314,7 @@ def FeatureNoAddr64 : SubtargetFeature<"mubuf-no-addr64",
def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
"FlatForGlobal",
"true",
"Force to generate flat instruction for global",
[FeatureNoAddr64]
"Force to generate flat instruction for global"
>;

// Dummy feature used to disable assembler instructions.
Expand Down Expand Up @@ -374,7 +367,7 @@ def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
FeatureScalarStores, FeatureInv2PiInlineImm, FeatureSDWA,
FeatureDPP, FeatureNoAddr64
FeatureDPP
]
>;

Expand Down
8 changes: 7 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,13 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,

ParseSubtargetFeatures(GPU, FullFS);

// Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es
// on VI and newer hardware to avoid assertion failures due to missing ADDR64
// variants of MUBUF instructions.
if (!hasAddr64() && !FS.contains("flat-for-global")) {
FlatForGlobal = true;
}

// FIXME: I don't think think Evergreen has any useful support for
// denormals, but should be checked. Should we issue a warning somewhere
// if someone tries to enable these?
Expand Down Expand Up @@ -82,7 +89,6 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
FP64FP16Denormals(false),
FPExceptions(false),
FlatForGlobal(false),
NoAddr64(false),
UnalignedScratchAccess(false),
UnalignedBufferAccess(false),

Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
bool FP64FP16Denormals;
bool FPExceptions;
bool FlatForGlobal;
bool NoAddr64;
bool UnalignedScratchAccess;
bool UnalignedBufferAccess;
bool EnableXNACK;
Expand Down
26 changes: 0 additions & 26 deletions llvm/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll

This file was deleted.

54 changes: 54 additions & 0 deletions llvm/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-DEFAULT -check-prefix=ALL %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-NODEFAULT -check-prefix=ALL %s
; RUN: llc -mtriple=amdgcn-- -mcpu=tonga < %s | FileCheck -check-prefix=HSA-NOADDR64 -check-prefix=ALL %s
; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=NOHSA-DEFAULT -check-prefix=ALL %s
; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=NOHSA-NODEFAULT -check-prefix=ALL %s
; RUN: llc -mtriple=amdgcn-- -mcpu=tonga < %s | FileCheck -check-prefix=NOHSA-NOADDR64 -check-prefix=ALL %s


; There are no stack objects even though flat is used by default, so
; flat_scratch_init should be disabled.

; ALL-LABEL: {{^}}test:
; HSA: .amd_kernel_code_t
; HSA: enable_sgpr_flat_scratch_init = 0
; HSA: .end_amd_kernel_code_t

; ALL-NOT: flat_scr

; HSA-DEFAULT: flat_store_dword
; HSA-NODEFAULT: buffer_store_dword
; HSA-NOADDR64: flat_store_dword

; NOHSA-DEFAULT: buffer_store_dword
; NOHSA-NODEFAULT: flat_store_dword
; NOHSA-NOADDR64: flat_store_dword
define void @test(i32 addrspace(1)* %out) {
entry:
store i32 0, i32 addrspace(1)* %out
ret void
}

; HSA-DEFAULT: flat_store_dword
; HSA-NODEFAULT: buffer_store_dword
; HSA-NOADDR64: flat_store_dword

; NOHSA-DEFAULT: buffer_store_dword
; NOHSA-NODEFAULT: flat_store_dword
; NOHSA-NOADDR64: flat_store_dword
define void @test_addr64(i32 addrspace(1)* %out) {
entry:
%out.addr = alloca i32 addrspace(1)*, align 4

store i32 addrspace(1)* %out, i32 addrspace(1)** %out.addr, align 4
%ld0 = load i32 addrspace(1)*, i32 addrspace(1)** %out.addr, align 4

%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %ld0, i32 0
store i32 1, i32 addrspace(1)* %arrayidx, align 4

%ld1 = load i32 addrspace(1)*, i32 addrspace(1)** %out.addr, align 4
%arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %ld1, i32 1
store i32 2, i32 addrspace(1)* %arrayidx1, align 4

ret void
}

0 comments on commit d8f7ea3

Please sign in to comment.