-
Notifications
You must be signed in to change notification settings - Fork 10.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[AMDGPU] SI Load Store Optimizer: When merging with offset, use V_ADD…
…_{I|U}32_e64 - Change inserted add ( V_ADD_{I|U}32_e32 ) to _e64 version ( V_ADD_{I|U}32_e64 ) so that the add uses a vreg for the carry; this prevents inserted v_add from killing VCC; the _e64 version doesn't accept a literal in its encoding, so we need to introduce a mov instr as well to get the imm into a register. - Change pass name to "SI Load Store Optimizer"; this removes the '/', which complicates scripts. Differential Revision: https://reviews.llvm.org/D42124 llvm-svn: 323153
- Loading branch information
Showing
3 changed files
with
100 additions
and
36 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
# RUN: llc -march=amdgcn -mcpu=gfx803 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefixes=GCN,VI %s | ||
# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s | ||
|
||
# If there's a base offset, check that SILoadStoreOptimizer creates | ||
# V_ADD_{I|U}32_e64 for that offset; _e64 uses a vreg for the carry (rather than | ||
# %vcc, which is used in _e32); this ensures that %vcc is not inadvertently | ||
# clobbered. | ||
|
||
# GCN-LABEL: name: kernel | ||
|
||
# VI: V_ADD_I32_e64 %6, %0, | ||
# VI-NEXT: DS_WRITE2_B32 killed %7, %0, %3, 0, 8, | ||
# VI: V_ADD_I32_e64 %10, %3, | ||
# VI-NEXT: DS_READ2_B32 killed %11, 0, 8, | ||
|
||
# GFX9: V_ADD_U32_e64 %6, %0, | ||
# GFX9-NEXT: DS_WRITE2_B32_gfx9 killed %7, %0, %3, 0, 8, | ||
# GFX9: V_ADD_U32_e64 %9, %3, | ||
# GFX9-NEXT: DS_READ2_B32_gfx9 killed %10, 0, 8, | ||
|
||
--- | | ||
@0 = internal unnamed_addr addrspace(3) global [256 x float] undef, align 4 | ||
|
||
define amdgpu_kernel void @kernel() { | ||
bb.0: | ||
br label %bb2 | ||
|
||
bb1: | ||
ret void | ||
|
||
bb2: | ||
%tmp = getelementptr inbounds [256 x float], [256 x float] addrspace(3)* @0, i32 0, i32 0 | ||
%tmp1 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 8 | ||
%tmp2 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 16 | ||
%tmp3 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 24 | ||
br label %bb1 | ||
} | ||
--- | ||
name: kernel | ||
body: | | ||
bb.0: | ||
%0:vgpr_32 = IMPLICIT_DEF | ||
S_BRANCH %bb.2 | ||
bb.1: | ||
S_ENDPGM | ||
bb.2: | ||
%1:sreg_64_xexec = V_CMP_NE_U32_e64 %0, 0, implicit %exec | ||
%2:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %1, implicit %exec | ||
V_CMP_NE_U32_e32 1, %2, implicit-def %vcc, implicit %exec | ||
DS_WRITE_B32 %0, %0, 1024, 0, implicit %m0, implicit %exec :: (store 4 into %ir.tmp) | ||
%3:vgpr_32 = V_MOV_B32_e32 0, implicit %exec | ||
DS_WRITE_B32 %0, %3, 1056, 0, implicit %m0, implicit %exec :: (store 4 into %ir.tmp1) | ||
%4:vgpr_32 = DS_READ_B32 %3, 1088, 0, implicit %m0, implicit %exec :: (load 4 from %ir.tmp2) | ||
%5:vgpr_32 = DS_READ_B32 %3, 1120, 0, implicit %m0, implicit %exec :: (load 4 from %ir.tmp3) | ||
%vcc = S_AND_B64 %exec, %vcc, implicit-def %scc | ||
S_CBRANCH_VCCNZ %bb.1, implicit %vcc | ||
S_BRANCH %bb.1 | ||
... |