Skip to content

Commit

Permalink
AMDGPU/GlobalISel: Change intrinsic ID for _L to _LZ opt
Browse files Browse the repository at this point in the history
Still should handle the other case changes the opcode this way.
  • Loading branch information
arsenm committed Apr 1, 2020
1 parent f08df46 commit 5e4e8d0
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 34 deletions.
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
Expand Up @@ -55,6 +55,9 @@ struct ImageDimIntrinsicInfo {
};
const ImageDimIntrinsicInfo *getImageDimIntrinsicInfo(unsigned Intr);

const ImageDimIntrinsicInfo *getImageDimInstrinsicByBaseOpcode(unsigned BaseOpcode,
unsigned Dim);

} // end AMDGPU namespace
} // End llvm namespace

Expand Down
21 changes: 14 additions & 7 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Expand Up @@ -3717,19 +3717,24 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
int CorrectedNumVAddrs = NumVAddrs;

// Optimize _L to _LZ when _L is zero
if (AMDGPU::getMIMGLZMappingInfo(ImageDimIntr->BaseOpcode)) {
if (const AMDGPU::MIMGLZMappingInfo *LZMappingInfo =
AMDGPU::getMIMGLZMappingInfo(ImageDimIntr->BaseOpcode)) {
const ConstantFP *ConstantLod;
const int LodIdx = AddrIdx + NumVAddrs - 1;

// FIXME: This isn't the cleanest way to handle this, but it's the easiest
// option the current infrastructure gives. We really should be changing the
// base intrinsic opcode, but the current searchable tables only gives us
// the final MI opcode. Eliminate the register here, and track with an
// immediate 0 so the final selection will know to do the opcode change.
if (mi_match(MI.getOperand(LodIdx).getReg(), *MRI, m_GFCst(ConstantLod))) {
if (ConstantLod->isZero() || ConstantLod->isNegative()) {
MI.getOperand(LodIdx).ChangeToImmediate(0);
// Set new opcode to _lz variant of _l, and change the intrinsic ID.
ImageDimIntr = AMDGPU::getImageDimInstrinsicByBaseOpcode(
LZMappingInfo->LZ, ImageDimIntr->Dim);

// The starting indexes should remain in the same place.
--NumVAddrs;
--CorrectedNumVAddrs;

MI.getOperand(MI.getNumExplicitDefs()).setIntrinsicID(
static_cast<Intrinsic::ID>(ImageDimIntr->Intr));
MI.RemoveOperand(LodIdx);
}
}
}
Expand All @@ -3741,6 +3746,8 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(

if (mi_match(MI.getOperand(LodIdx).getReg(), *MRI, m_ICst(ConstantLod))) {
if (ConstantLod == 0) {
// TODO: Change intrinsic opcode and remove operand instead or replacing
// it with 0, as the _L to _LZ handling is done above.
MI.getOperand(LodIdx).ChangeToImmediate(0);
--CorrectedNumVAddrs;
}
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AMDGPU/MIMGInstructions.td
Expand Up @@ -817,6 +817,11 @@ def ImageDimIntrinsicTable : GenericTable {
let PrimaryKeyEarlyOut = 1;
}

def getImageDimInstrinsicByBaseOpcode : SearchIndex {
let Table = ImageDimIntrinsicTable;
let Key = ["BaseOpcode", "Dim"];
}

foreach intr = !listconcat(AMDGPUImageDimIntrinsics,
AMDGPUImageDimAtomicIntrinsics) in {
def : ImageDimIntrinsicInfo<intr>;
Expand Down
Expand Up @@ -6,40 +6,35 @@ define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg
; GFX9-LABEL: sample_l_1d:
; GFX9: ; %bb.0: ; %main_body
; GFX9-NEXT: s_mov_b32 s0, s2
; GFX9-NEXT: s_mov_b32 s2, s4
; GFX9-NEXT: s_mov_b32 s4, s6
; GFX9-NEXT: s_mov_b32 s6, s8
; GFX9-NEXT: s_mov_b32 s8, s10
; GFX9-NEXT: s_mov_b32 s10, s12
; GFX9-NEXT: v_mov_b32_e32 v1, 0xffff
; GFX9-NEXT: s_lshl_b32 s12, s0, 16
; GFX9-NEXT: s_mov_b32 s1, s3
; GFX9-NEXT: s_mov_b32 s2, s4
; GFX9-NEXT: s_mov_b32 s3, s5
; GFX9-NEXT: s_mov_b32 s4, s6
; GFX9-NEXT: s_mov_b32 s5, s7
; GFX9-NEXT: s_mov_b32 s6, s8
; GFX9-NEXT: s_mov_b32 s7, s9
; GFX9-NEXT: s_mov_b32 s8, s10
; GFX9-NEXT: s_mov_b32 s9, s11
; GFX9-NEXT: s_mov_b32 s10, s12
; GFX9-NEXT: s_mov_b32 s11, s13
; GFX9-NEXT: v_and_or_b32 v0, v0, v1, s12
; GFX9-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: sample_l_1d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_mov_b32 s0, s2
; GFX10-NEXT: s_mov_b32 s2, s4
; GFX10-NEXT: s_mov_b32 s4, s6
; GFX10-NEXT: s_mov_b32 s6, s8
; GFX10-NEXT: s_mov_b32 s8, s10
; GFX10-NEXT: s_mov_b32 s10, s12
; GFX10-NEXT: s_lshl_b32 s12, s0, 16
; GFX10-NEXT: s_mov_b32 s1, s3
; GFX10-NEXT: s_mov_b32 s2, s4
; GFX10-NEXT: s_mov_b32 s3, s5
; GFX10-NEXT: s_mov_b32 s4, s6
; GFX10-NEXT: s_mov_b32 s5, s7
; GFX10-NEXT: s_mov_b32 s6, s8
; GFX10-NEXT: s_mov_b32 s7, s9
; GFX10-NEXT: s_mov_b32 s8, s10
; GFX10-NEXT: s_mov_b32 s9, s11
; GFX10-NEXT: s_mov_b32 s10, s12
; GFX10-NEXT: s_mov_b32 s11, s13
; GFX10-NEXT: v_and_or_b32 v0, v0, 0xffff, s12
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX10-NEXT: s_waitcnt vmcnt(0)
Expand Down

0 comments on commit 5e4e8d0

Please sign in to comment.