Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[AMDGPU] Implemented dwordx3 variants of buffer/tbuffer load/store in…
…trinsics Now we have vec3 MVTs, this commit implements dwordx3 variants of the buffer intrinsics. On gfx6, a dwordx3 buffer load intrinsic is implemented as a dwordx4 instruction, and a dwordx3 buffer store intrinsic is not supported. We need to support the dwordx3 load intrinsic because it is generated by subtarget-unaware code in InstCombine. Differential Revision: https://reviews.llvm.org/D58904 Change-Id: I016729d8557b98a52f529638ae97c340a5922a4e llvm-svn: 356755
- Loading branch information
Tim Renouf
committed
Mar 22, 2019
1 parent
f95351b
commit 677387d
Showing
11 changed files
with
259 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
60 changes: 60 additions & 0 deletions
60
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.dwordx3.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
;RUN: llc < %s -march=amdgcn -mcpu=gfx600 -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,SI | ||
;RUN: llc < %s -march=amdgcn -mcpu=gfx700 -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,GCNX3 | ||
|
||
;CHECK-LABEL: {{^}}buffer_load_format_immoffs_x3: | ||
;SI: buffer_load_format_xyzw v[0:3], off, s[0:3], 0 offset:42 | ||
;GCNX3: buffer_load_format_xyz v[0:2], off, s[0:3], 0 offset:42 | ||
;CHECK: s_waitcnt | ||
define amdgpu_ps <3 x float> @buffer_load_format_immoffs_x3(<4 x i32> inreg) { | ||
main_body: | ||
%data = call <3 x float> @llvm.amdgcn.buffer.load.format.v3f32(<4 x i32> %0, i32 0, i32 42, i1 0, i1 0) | ||
ret <3 x float> %data | ||
} | ||
|
||
;CHECK-LABEL: {{^}}buffer_load_immoffs_x3: | ||
;SI: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:40 | ||
;GCNX3: buffer_load_dwordx3 v[0:2], off, s[0:3], 0 offset:40 | ||
;CHECK: s_waitcnt | ||
define amdgpu_ps <3 x float> @buffer_load_immoffs_x3(<4 x i32> inreg) { | ||
main_body: | ||
%data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %0, i32 0, i32 40, i1 0, i1 0) | ||
ret <3 x float> %data | ||
} | ||
|
||
;CHECK-LABEL: {{^}}buffer_raw_load_immoffs_x3: | ||
;SI: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:40 | ||
;GCNX3: buffer_load_dwordx3 v[0:2], off, s[0:3], 0 offset:40 | ||
;CHECK: s_waitcnt | ||
define amdgpu_ps <3 x float> @buffer_raw_load_immoffs_x3(<4 x i32> inreg) { | ||
main_body: | ||
%data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %0, i32 40, i32 0, i32 0) | ||
ret <3 x float> %data | ||
} | ||
|
||
;CHECK-LABEL: {{^}}buffer_struct_load_format_immoffs_x3: | ||
;SI: buffer_load_format_xyzw v[0:3], {{v[0-9]+}}, s[0:3], 0 idxen offset:42 | ||
;GCNX3: buffer_load_format_xyz v[0:2], {{v[0-9]+}}, s[0:3], 0 idxen offset:42 | ||
;CHECK: s_waitcnt | ||
define amdgpu_ps <3 x float> @buffer_struct_load_format_immoffs_x3(<4 x i32> inreg) { | ||
main_body: | ||
%data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %0, i32 0, i32 42, i32 0, i32 0) | ||
ret <3 x float> %data | ||
} | ||
|
||
;CHECK-LABEL: {{^}}struct_buffer_load_immoffs_x3: | ||
;SI: buffer_load_dwordx4 v[0:3], {{v[0-9]+}}, s[0:3], 0 idxen offset:40 | ||
;GCNX3: buffer_load_dwordx3 v[0:2], {{v[0-9]+}}, s[0:3], 0 idxen offset:40 | ||
;CHECK: s_waitcnt | ||
define amdgpu_ps <3 x float> @struct_buffer_load_immoffs_x3(<4 x i32> inreg) { | ||
main_body: | ||
%data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %0, i32 0, i32 40, i32 0, i32 0) | ||
ret <3 x float> %data | ||
} | ||
|
||
declare <3 x float> @llvm.amdgcn.buffer.load.format.v3f32(<4 x i32>, i32, i32, i1, i1) #0 | ||
declare <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32>, i32, i32, i1, i1) #0 | ||
declare <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32>, i32, i32, i32) #0 | ||
declare <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32>, i32, i32, i32) #0 | ||
declare <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32>, i32, i32, i32, i32) #0 | ||
declare <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32>, i32, i32, i32, i32) #0 | ||
|
53 changes: 53 additions & 0 deletions
53
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.dwordx3.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefix=CHECK | ||
|
||
;CHECK-LABEL: {{^}}buffer_store_format_immoffs_x3: | ||
;CHECK-NOT: s_waitcnt | ||
;CHECK: buffer_store_format_xyz v[0:2], off, s[0:3], 0 offset:42 | ||
define amdgpu_ps void @buffer_store_format_immoffs_x3(<4 x i32> inreg, <3 x float>) { | ||
main_body: | ||
call void @llvm.amdgcn.buffer.store.format.v3f32(<3 x float> %1, <4 x i32> %0, i32 0, i32 42, i1 0, i1 0) | ||
ret void | ||
} | ||
|
||
;CHECK-LABEL: {{^}}buffer_store_immoffs_x3: | ||
;CHECK-NOT: s_waitcnt | ||
;CHECK: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 offset:42 | ||
define amdgpu_ps void @buffer_store_immoffs_x3(<4 x i32> inreg, <3 x float>) { | ||
main_body: | ||
call void @llvm.amdgcn.buffer.store.v3f32(<3 x float> %1, <4 x i32> %0, i32 0, i32 42, i1 0, i1 0) | ||
ret void | ||
} | ||
|
||
;CHECK-LABEL: {{^}}raw_buffer_store_format_immoffs_x3: | ||
;CHECK-NOT: s_waitcnt | ||
;CHECK: buffer_store_format_xyz v[0:2], off, s[0:3], 0 offset:42 | ||
define amdgpu_ps void @raw_buffer_store_format_immoffs_x3(<4 x i32> inreg, <3 x float>) { | ||
main_body: | ||
call void @llvm.amdgcn.raw.buffer.store.format.v3f32(<3 x float> %1, <4 x i32> %0, i32 42, i32 0, i32 0) | ||
ret void | ||
} | ||
|
||
;CHECK-LABEL: {{^}}raw_buffer_store_immoffs_x3: | ||
;CHECK-NOT: s_waitcnt | ||
;CHECK: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 offset:42 | ||
define amdgpu_ps void @raw_buffer_store_immoffs_x3(<4 x i32> inreg, <3 x float>) { | ||
main_body: | ||
call void @llvm.amdgcn.raw.buffer.store.v3f32(<3 x float> %1, <4 x i32> %0, i32 42, i32 0, i32 0) | ||
ret void | ||
} | ||
|
||
;CHECK-LABEL: {{^}}struct_buffer_store_immoffs_x3: | ||
;CHECK-NOT: s_waitcnt | ||
;CHECK: buffer_store_dwordx3 v[0:2], {{v[0-9]+}}, s[0:3], 0 idxen offset:42 | ||
define amdgpu_ps void @struct_buffer_store_immoffs_x3(<4 x i32> inreg, <3 x float>) { | ||
main_body: | ||
call void @llvm.amdgcn.struct.buffer.store.v3f32(<3 x float> %1, <4 x i32> %0, i32 0, i32 42, i32 0, i32 0) | ||
ret void | ||
} | ||
|
||
declare void @llvm.amdgcn.buffer.store.v3f32(<3 x float>, <4 x i32>, i32, i32, i1, i1) #0 | ||
declare void @llvm.amdgcn.buffer.store.format.v3f32(<3 x float>, <4 x i32>, i32, i32, i1, i1) #0 | ||
declare void @llvm.amdgcn.raw.buffer.store.format.v3f32(<3 x float>, <4 x i32>, i32, i32, i32) #0 | ||
declare void @llvm.amdgcn.raw.buffer.store.v3f32(<3 x float>, <4 x i32>, i32, i32, i32) #0 | ||
declare void @llvm.amdgcn.struct.buffer.store.format.v3f32(<3 x float>, <4 x i32>, i32, i32, i32, i32) #0 | ||
declare void @llvm.amdgcn.struct.buffer.store.v3f32(<3 x float>, <4 x i32>, i32, i32, i32, i32) #0 |
Oops, something went wrong.