Skip to content

Commit

Permalink
Merging r323909:
Browse files Browse the repository at this point in the history
------------------------------------------------------------------------
r323909 | mareko | 2018-01-31 21:18:11 +0100 (Wed, 31 Jan 2018) | 13 lines

AMDGPU: Fold inline offset for loads properly in moveToVALU on GFX9

Summary:
This enables load merging into x2, x4, which is driven by inline offsets.

6500 shaders are affected:
Code Size in affected shaders: -15.14 %

Reviewers: arsenm, nhaehnle

Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits

Differential Revision: https://reviews.llvm.org/D42078
------------------------------------------------------------------------

llvm-svn: 324089
  • Loading branch information
zmodem committed Feb 2, 2018
1 parent 9f3da91 commit 3b724f6
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 37 deletions.
53 changes: 31 additions & 22 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Expand Up @@ -3756,36 +3756,45 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
// FIXME: This isn't safe because the addressing mode doesn't work
// correctly if vaddr is negative.
//
// FIXME: Handle v_add_u32 and VOP3 form. Also don't rely on immediate
// being in src0.
//
// FIXME: Should probably be done somewhere else, maybe SIFoldOperands.
//
// See if we can extract an immediate offset by recognizing one of these:
// V_ADD_I32_e32 dst, imm, src1
// V_ADD_I32_e32 dst, (S_MOV_B32 imm), src1
// V_ADD will be removed by "Remove dead machine instructions".
if (Add && Add->getOpcode() == AMDGPU::V_ADD_I32_e32) {
const MachineOperand *Src =
getNamedOperand(*Add, AMDGPU::OpName::src0);

if (Src->isReg()) {
auto Mov = MRI.getUniqueVRegDef(Src->getReg());
if (Mov && Mov->getOpcode() == AMDGPU::S_MOV_B32)
Src = &Mov->getOperand(1);
}
if (Add &&
(Add->getOpcode() == AMDGPU::V_ADD_I32_e32 ||
Add->getOpcode() == AMDGPU::V_ADD_U32_e64)) {
static const unsigned SrcNames[2] = {
AMDGPU::OpName::src0,
AMDGPU::OpName::src1,
};

// Find a literal offset in one of source operands.
for (int i = 0; i < 2; i++) {
const MachineOperand *Src =
getNamedOperand(*Add, SrcNames[i]);

if (Src->isReg()) {
auto Mov = MRI.getUniqueVRegDef(Src->getReg());
if (Mov && Mov->getOpcode() == AMDGPU::S_MOV_B32)
Src = &Mov->getOperand(1);
}

if (Src) {
if (Src->isImm())
Offset = Src->getImm();
else if (Src->isCImm())
Offset = Src->getCImm()->getZExtValue();
}

if (Offset && isLegalMUBUFImmOffset(Offset)) {
VAddr = getNamedOperand(*Add, SrcNames[!i]);
break;
}

if (Src) {
if (Src->isImm())
Offset = Src->getImm();
else if (Src->isCImm())
Offset = Src->getCImm()->getZExtValue();
}

if (Offset && isLegalMUBUFImmOffset(Offset))
VAddr = getNamedOperand(*Add, AMDGPU::OpName::src1);
else
Offset = 0;
}
}

BuildMI(*MBB, Inst, Inst.getDebugLoc(),
Expand Down
18 changes: 3 additions & 15 deletions llvm/test/CodeGen/AMDGPU/smrd.ll
Expand Up @@ -194,11 +194,7 @@ main_body:

; GCN-LABEL: {{^}}smrd_vgpr_offset_imm:
; GCN-NEXT: %bb.

; SICIVI-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen offset:4095 ;

; GFX9-NEXT: v_add_u32_e32 [[ADD:v[0-9]+]], 0xfff, v0
; GFX9-NEXT: buffer_load_dword v{{[0-9]}}, [[ADD]], s[0:3], 0 offen ;
; GCN-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen offset:4095 ;
define amdgpu_ps float @smrd_vgpr_offset_imm(<4 x i32> inreg %desc, i32 %offset) #0 {
main_body:
%off = add i32 %offset, 4095
Expand Down Expand Up @@ -244,16 +240,8 @@ main_body:

; GCN-LABEL: {{^}}smrd_vgpr_merged:
; GCN-NEXT: %bb.

; SICIVI-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4
; SICIVI-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28

; GFX9: buffer_load_dword
; GFX9: buffer_load_dword
; GFX9: buffer_load_dword
; GFX9: buffer_load_dword
; GFX9: buffer_load_dword
; GFX9: buffer_load_dword
; GCN-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4
; GCN-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28
define amdgpu_ps void @smrd_vgpr_merged(<4 x i32> inreg %desc, i32 %a) #0 {
main_body:
%a1 = add i32 %a, 4
Expand Down

0 comments on commit 3b724f6

Please sign in to comment.