Skip to content

Commit

Permalink
AMDGPU/SI: Don't mark VINTRP instructions as mayLoad
Browse files Browse the repository at this point in the history
Summary:
These instructions technically do read from memory, but the memory
is considered to be out of bounds for normal load/store instructions.

shader-db stats:

SGPRS: 1416075 -> 1413323 (-0.19 %)
VGPRS: 867413 -> 863935 (-0.40 %)
Spilled SGPRs: 1409 -> 1354 (-3.90 %)
Spilled VGPRs: 63 -> 63 (0.00 %)
Private memory VGPRs: 880 -> 880 (0.00 %)
Scratch size: 2648 -> 2632 (-0.60 %) dwords per thread
Code Size: 37889052 -> 37897340 (0.02 %) bytes
LDS: 2147 -> 2147 (0.00 %) blocks
Max Waves: 279243 -> 280369 (0.40 %)
Wait states: 0 -> 0 (0.00 %)

Reviewers: nhaehnle, mareko, arsenm

Subscribers: kzhuravl, wdng, yaxunl, tony-tye

Differential Revision: https://reviews.llvm.org/D27593

llvm-svn: 289219
  • Loading branch information
tstellarAMD committed Dec 9, 2016
1 parent bf9c0e7 commit 2a48433
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 6 deletions.
14 changes: 13 additions & 1 deletion llvm/lib/Target/AMDGPU/SIInstrFormats.td
Expand Up @@ -229,7 +229,19 @@ let Uses = [EXEC] in {

class VINTRPCommon <dag outs, dag ins, string asm, list<dag> pattern> :
InstSI <outs, ins, asm, pattern> {
let mayLoad = 1;

// VINTRP instructions read parameter values from LDS, but these parameter
// values are stored outside of the LDS memory that is allocated to the
// shader for general purpose use.
//
// While it may be possible for ds_read/ds_write instructions to access
// the parameter values in LDS, this would essentially be an out-of-bounds
// memory access which we consider to be undefined behavior.
//
// So even though these instructions read memory, this memory is outside the
// addressable memory space for the shader, and we consider these instructions
// to be readnone.
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
}
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/llvm.SI.fs.interp.ll
Expand Up @@ -5,10 +5,10 @@

;GCN-LABEL: {{^}}main:
;GCN-NOT: s_wqm
;GCN: s_mov_b32
;GCN-NEXT: v_interp_mov_f32
;GCN: v_interp_p1_f32
;GCN: v_interp_p2_f32
;GCN: s_mov_b32 m0
;GCN-DAG: v_interp_mov_f32
;GCN-DAG: v_interp_p1_f32
;GCN-DAG: v_interp_p2_f32

define amdgpu_ps void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>) {
main_body:
Expand Down
16 changes: 15 additions & 1 deletion llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll
@@ -1,5 +1,5 @@
;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefixes=GCN,VI %s

;GCN-LABEL: {{^}}v_interp:
;GCN-NOT: s_wqm
Expand All @@ -20,6 +20,20 @@ main_body:
ret void
}

; SI won't merge ds memory operations, because of the signed offset bug, so
; we only have check lines for VI.
; VI-LABEL: v_interp_readnone:
; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
; VI: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4
define amdgpu_ps void @v_interp_readnone(float addrspace(3)* %lds) {
store float 0.0, float addrspace(3)* %lds
%tmp1 = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 0)
%tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4
store float 0.0, float addrspace(3)* %tmp2
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp1, float %tmp1, float %tmp1, float %tmp1)
ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0

Expand Down

0 comments on commit 2a48433

Please sign in to comment.