Skip to content

Commit

Permalink
[amdgpu] Change LDS lowering default to hybrid
Browse files Browse the repository at this point in the history
Postponed from D139433 until the bug fixed by D139874 could be resolved.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D141852
  • Loading branch information
JonChesterfield committed Feb 24, 2023
1 parent 8e2f838 commit bf579a7
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 11 deletions.
12 changes: 12 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Expand Up @@ -1329,6 +1329,18 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = G->getGlobal();

if (G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
if (!MFI->isModuleEntryFunction()) {
if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
if (AMDGPUMachineFunction::isKnownAddressLDSGlobal(*GVar)) {
unsigned Offset =
AMDGPUMachineFunction::calculateKnownAddressOfLDSGlobal(*GVar);
return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType());
}
}
}
}

if (G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
G->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) {
if (!MFI->isModuleEntryFunction() &&
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
Expand Up @@ -162,7 +162,7 @@ enum class LoweringKind { module, table, kernel, hybrid };
cl::opt<LoweringKind> LoweringKindLoc(
"amdgpu-lower-module-lds-strategy",
cl::desc("Specify lowering strategy for function LDS access:"), cl::Hidden,
cl::init(LoweringKind::module),
cl::init(LoweringKind::hybrid),
cl::values(
clEnumValN(LoweringKind::table, "table", "Lower via table lookup"),
clEnumValN(LoweringKind::module, "module", "Lower via module struct"),
Expand Down
18 changes: 8 additions & 10 deletions llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll
Expand Up @@ -30,13 +30,12 @@ define void @f0() {
; GCN-LABEL: f0:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[0:1], s[6:7]
; GCN-NEXT: s_trap 2
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_mov_b32 m0, -1
; GCN-NEXT: ds_read_b32 v0, v0
; GCN-NEXT: ds_read_b32 v1, v0 offset:4
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_add_f32_e32 v0, v0, v0
; GCN-NEXT: ds_write_b32 v0, v0
; GCN-NEXT: v_add_f32_e32 v1, v1, v1
; GCN-NEXT: ds_write_b32 v0, v1 offset:4
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
%ld = load float, ptr addrspace(3) @v0
Expand Down Expand Up @@ -120,13 +119,12 @@ define void @f3() {
; GCN-LABEL: f3:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[0:1], s[6:7]
; GCN-NEXT: s_trap 2
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_mov_b32 m0, -1
; GCN-NEXT: ds_read_u8 v0, v0
; GCN-NEXT: ds_read_u8 v1, v0 offset:8
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mul_lo_u32 v0, v0, 5
; GCN-NEXT: ds_write_b8 v0, v0
; GCN-NEXT: v_mul_lo_u32 v1, v1, 5
; GCN-NEXT: ds_write_b8 v0, v1 offset:8
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
%ld = load i8, ptr addrspace(3) @v3
Expand Down

0 comments on commit bf579a7

Please sign in to comment.