Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -841,6 +841,7 @@ LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) {
case Sgpr128:
case Vgpr128:
return LLT::scalar(128);
case SgprP0:
case VgprP0:
return LLT::pointer(0, 64);
case SgprP1:
Expand All @@ -855,6 +856,8 @@ LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) {
case SgprP5:
case VgprP5:
return LLT::pointer(5, 32);
case SgprP8:
return LLT::pointer(8, 128);
case SgprV2S16:
case VgprV2S16:
case UniInVgprV2S16:
Expand Down Expand Up @@ -940,10 +943,12 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
case Sgpr32_WF:
case Sgpr64:
case Sgpr128:
case SgprP0:
case SgprP1:
case SgprP3:
case SgprP4:
case SgprP5:
case SgprP8:
case SgprPtr32:
case SgprPtr64:
case SgprPtr128:
Expand Down Expand Up @@ -1022,10 +1027,12 @@ void RegBankLegalizeHelper::applyMappingDst(
case Sgpr32:
case Sgpr64:
case Sgpr128:
case SgprP0:
case SgprP1:
case SgprP3:
case SgprP4:
case SgprP5:
case SgprP8:
case SgprV2S16:
case SgprV2S32:
case SgprV4S32:
Expand Down Expand Up @@ -1163,10 +1170,12 @@ void RegBankLegalizeHelper::applyMappingSrc(
case Sgpr32:
case Sgpr64:
case Sgpr128:
case SgprP0:
case SgprP1:
case SgprP3:
case SgprP4:
case SgprP5:
case SgprP8:
case SgprV2S16:
case SgprV2S32:
case SgprV4S32: {
Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
return MRI.getType(Reg) == LLT::pointer(4, 64);
case P5:
return MRI.getType(Reg) == LLT::pointer(5, 32);
case P8:
return MRI.getType(Reg) == LLT::pointer(8, 128);
case Ptr32:
return isAnyPtr(MRI.getType(Reg), 32);
case Ptr64:
Expand Down Expand Up @@ -108,6 +110,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isUniform(Reg);
case UniP5:
return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isUniform(Reg);
case UniP8:
return MRI.getType(Reg) == LLT::pointer(8, 128) && MUI.isUniform(Reg);
case UniPtr32:
return isAnyPtr(MRI.getType(Reg), 32) && MUI.isUniform(Reg);
case UniPtr64:
Expand Down Expand Up @@ -903,6 +907,15 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,

addRulesForGOpcs({G_READSTEADYCOUNTER}, Standard).Uni(S64, {{Sgpr64}, {}});

addRulesForGOpcs({G_BLOCK_ADDR}).Any({{UniP0}, {{SgprP0}, {}}});

addRulesForGOpcs({G_GLOBAL_VALUE})
.Any({{UniP0}, {{SgprP0}, {}}})
.Any({{UniP1}, {{SgprP1}, {}}})
.Any({{UniP3}, {{SgprP3}, {}}})
.Any({{UniP4}, {{SgprP4}, {}}})
.Any({{UniP8}, {{SgprP8}, {}}});

bool hasSALUFloat = ST->hasSALUFloatInsts();

addRulesForGOpcs({G_FADD}, Standard)
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ enum UniformityLLTOpPredicateID {
P3,
P4,
P5,
P8,
Ptr32,
Ptr64,
Ptr128,
Expand All @@ -72,6 +73,7 @@ enum UniformityLLTOpPredicateID {
UniP3,
UniP4,
UniP5,
UniP8,
UniPtr32,
UniPtr64,
UniPtr128,
Expand Down Expand Up @@ -134,10 +136,12 @@ enum RegBankLLTMappingApplyID {
Sgpr32,
Sgpr64,
Sgpr128,
SgprP0,
SgprP1,
SgprP3,
SgprP4,
SgprP5,
SgprP8,
SgprPtr32,
SgprPtr64,
SgprPtr128,
Expand Down
104 changes: 104 additions & 0 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/global-value-addrspaces.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s

@flat = external global i32, align 4
@global = external addrspace(1) global i32, align 4
@lds = addrspace(3) global i32 poison, align 4
@constant = external addrspace(4) constant i32, align 4
@buf = external addrspace(8) global i8

define ptr @global_value_as0_external() {
; GCN-LABEL: global_value_as0_external:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_getpc_b64 s[4:5]
; GCN-NEXT: s_add_u32 s4, s4, flat@gotpcrel32@lo+4
; GCN-NEXT: s_addc_u32 s5, s5, flat@gotpcrel32@hi+12
; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, s4
; GCN-NEXT: v_mov_b32_e32 v1, s5
; GCN-NEXT: s_setpc_b64 s[30:31]
ret ptr @flat
}

define ptr addrspace(1) @global_value_as1_external() {
; GCN-LABEL: global_value_as1_external:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_getpc_b64 s[4:5]
; GCN-NEXT: s_add_u32 s4, s4, global@gotpcrel32@lo+4
; GCN-NEXT: s_addc_u32 s5, s5, global@gotpcrel32@hi+12
; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, s4
; GCN-NEXT: v_mov_b32_e32 v1, s5
; GCN-NEXT: s_setpc_b64 s[30:31]
ret ptr addrspace(1) @global
}

define ptr addrspace(4) @global_value_as4_external() {
; GCN-LABEL: global_value_as4_external:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_getpc_b64 s[4:5]
; GCN-NEXT: s_add_u32 s4, s4, constant@gotpcrel32@lo+4
; GCN-NEXT: s_addc_u32 s5, s5, constant@gotpcrel32@hi+12
; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, s4
; GCN-NEXT: v_mov_b32_e32 v1, s5
; GCN-NEXT: s_setpc_b64 s[30:31]
ret ptr addrspace(4) @constant
}

define amdgpu_kernel void @global_value_as3_lds_kernel(ptr addrspace(1) %out) {
; GCN-LABEL: global_value_as3_lds_kernel:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: global_store_dword v0, v0, s[0:1]
; GCN-NEXT: s_endpgm
%addr = ptrtoint ptr addrspace(3) @lds to i32
store i32 %addr, ptr addrspace(1) %out
ret void
}

define void @global_value_as8_buffer_store(i32 %val) {
; GCN-LABEL: global_value_as8_buffer_store:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_getpc_b64 s[8:9]
; GCN-NEXT: s_add_u32 s8, s8, buf@gotpcrel32@lo+4
; GCN-NEXT: s_addc_u32 s9, s9, buf@gotpcrel32@hi+12
; GCN-NEXT: s_load_dwordx4 s[4:7], s[8:9], 0x0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %val, ptr addrspace(8) @buf, i32 0, i32 0, i32 0)
ret void
}

define i32 @global_value_as8_buffer_load(i32 %offset) {
; GCN-LABEL: global_value_as8_buffer_load:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_getpc_b64 s[8:9]
; GCN-NEXT: s_add_u32 s8, s8, buf@gotpcrel32@lo+4
; GCN-NEXT: s_addc_u32 s9, s9, buf@gotpcrel32@hi+12
; GCN-NEXT: s_load_dwordx4 s[4:7], s[8:9], 0x0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
%val = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) @buf, i32 %offset, i32 0, i32 0)
ret i32 %val
}

declare void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32, ptr addrspace(8) nocapture writeonly, i32, i32, i32 immarg) #0
declare i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) nocapture readonly, i32, i32, i32 immarg) #1

attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) }
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: read) }
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/GlobalISel/lds-relocs.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -amdgpu-enable-lower-module-lds=0 -show-mc-encoding < %s | FileCheck -check-prefixes=GCN %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -amdgpu-enable-lower-module-lds=0 -show-mc-encoding < %s | FileCheck -check-prefixes=GCN %s
; FIXME: Merge with DAG test

@lds.external = external unnamed_addr addrspace(3) global [0 x i32]
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
; RUN: llc -mtriple=amdgcn -mcpu=tahiti -global-isel -stop-after=instruction-select -o - %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel -stop-after=instruction-select -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s
; RUN: llc -mtriple=amdgcn -mcpu=tahiti -global-isel -new-reg-bank-select -stop-after=instruction-select -o - %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel -new-reg-bank-select -stop-after=instruction-select -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s

; RUN: not llc -mtriple=amdgcn -mcpu=tahiti -global-isel < %s 2>&1 | FileCheck %s
; RUN: not llc -mtriple=amdgcn -mcpu=tonga -global-isel < %s 2>&1 | FileCheck %s
; RUN: not llc -mtriple=amdgcn -mcpu=tahiti -global-isel -new-reg-bank-select < %s 2>&1 | FileCheck %s
; RUN: not llc -mtriple=amdgcn -mcpu=tonga -global-isel -new-reg-bank-select < %s 2>&1 | FileCheck %s

; CHECK: error: lds: unsupported initializer for address space

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -O0 -march amdgcn -mcpu=fiji -run-pass=regbankselect %s -o - | FileCheck %s
# RUN: llc -O0 -march amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -o - | FileCheck %s

--- |

Expand Down
Loading