Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[AMDGPU] support image load/store a16
Our a16 support was only enabled for sample/gather and buffer load/store, but not for image load/store operations (which take an i16 as the pixel index rather than a half). Fix our isel lowering and add test cases to prove it out. Differential Revision: https://reviews.llvm.org/D53750 llvm-svn: 345710
- Loading branch information
Neil Henning
committed
Oct 31, 2018
1 parent
262baa4
commit 63718b2
Showing
7 changed files
with
1,146 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
128 changes: 128 additions & 0 deletions
128
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.a16.d16.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s | ||
|
||
; GCN-LABEL: {{^}}load.f16.1d: | ||
; GCN: image_load v[0:1], v0, s[0:7] dmask:0x1 unorm a16 d16 | ||
define amdgpu_ps <4 x half> @load.f16.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { | ||
main_body: | ||
%x = extractelement <2 x i16> %coords, i32 0 | ||
%v = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i16(i32 1, i16 %x, <8 x i32> %rsrc, i32 0, i32 0) | ||
ret <4 x half> %v | ||
} | ||
|
||
; GCN-LABEL: {{^}}load.v2f16.1d: | ||
; GCN: image_load v[0:1], v0, s[0:7] dmask:0x3 unorm a16 d16 | ||
define amdgpu_ps <4 x half> @load.v2f16.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { | ||
main_body: | ||
%x = extractelement <2 x i16> %coords, i32 0 | ||
%v = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i16(i32 3, i16 %x, <8 x i32> %rsrc, i32 0, i32 0) | ||
ret <4 x half> %v | ||
} | ||
|
||
; GCN-LABEL: {{^}}load.v3f16.1d: | ||
; GCN: image_load v[0:1], v0, s[0:7] dmask:0x7 unorm a16 d16 | ||
define amdgpu_ps <4 x half> @load.v3f16.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { | ||
main_body: | ||
%x = extractelement <2 x i16> %coords, i32 0 | ||
%v = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i16(i32 7, i16 %x, <8 x i32> %rsrc, i32 0, i32 0) | ||
ret <4 x half> %v | ||
} | ||
|
||
; GCN-LABEL: {{^}}load.v4f16.1d: | ||
; GCN: image_load v[0:1], v0, s[0:7] dmask:0xf unorm a16 d16 | ||
define amdgpu_ps <4 x half> @load.v4f16.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { | ||
main_body: | ||
%x = extractelement <2 x i16> %coords, i32 0 | ||
%v = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i16(i32 15, i16 %x, <8 x i32> %rsrc, i32 0, i32 0) | ||
ret <4 x half> %v | ||
} | ||
|
||
; GCN-LABEL: {{^}}load.f16.2d: | ||
; GCN: image_load v[0:1], v0, s[0:7] dmask:0x1 unorm a16 d16 | ||
define amdgpu_ps <4 x half> @load.f16.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { | ||
main_body: | ||
%x = extractelement <2 x i16> %coords, i32 0 | ||
%y = extractelement <2 x i16> %coords, i32 1 | ||
%v = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i16(i32 1, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0) | ||
ret <4 x half> %v | ||
} | ||
|
||
; GCN-LABEL: {{^}}load.v2f16.2d: | ||
; GCN: image_load v[0:1], v0, s[0:7] dmask:0x3 unorm a16 d16 | ||
define amdgpu_ps <4 x half> @load.v2f16.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { | ||
main_body: | ||
%x = extractelement <2 x i16> %coords, i32 0 | ||
%y = extractelement <2 x i16> %coords, i32 1 | ||
%v = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i16(i32 3, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0) | ||
ret <4 x half> %v | ||
} | ||
|
||
; GCN-LABEL: {{^}}load.v3f16.2d: | ||
; GCN: image_load v[0:1], v0, s[0:7] dmask:0x7 unorm a16 d16 | ||
define amdgpu_ps <4 x half> @load.v3f16.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { | ||
main_body: | ||
%x = extractelement <2 x i16> %coords, i32 0 | ||
%y = extractelement <2 x i16> %coords, i32 1 | ||
%v = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i16(i32 7, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0) | ||
ret <4 x half> %v | ||
} | ||
|
||
; GCN-LABEL: {{^}}load.v4f16.2d: | ||
; GCN: image_load v[0:1], v0, s[0:7] dmask:0xf unorm a16 d16 | ||
define amdgpu_ps <4 x half> @load.v4f16.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { | ||
main_body: | ||
%x = extractelement <2 x i16> %coords, i32 0 | ||
%y = extractelement <2 x i16> %coords, i32 1 | ||
%v = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i16(i32 15, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0) | ||
ret <4 x half> %v | ||
} | ||
|
||
; GCN-LABEL: {{^}}load.f16.3d: | ||
; GCN: image_load v[0:1], v[0:1], s[0:7] dmask:0x1 unorm a16 d16 | ||
define amdgpu_ps <4 x half> @load.f16.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { | ||
main_body: | ||
%x = extractelement <2 x i16> %coords_lo, i32 0 | ||
%y = extractelement <2 x i16> %coords_lo, i32 1 | ||
%z = extractelement <2 x i16> %coords_hi, i32 0 | ||
%v = call <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i16(i32 1, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0) | ||
ret <4 x half> %v | ||
} | ||
|
||
; GCN-LABEL: {{^}}load.v2f16.3d: | ||
; GCN: image_load v[0:1], v[0:1], s[0:7] dmask:0x3 unorm a16 d16 | ||
define amdgpu_ps <4 x half> @load.v2f16.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { | ||
main_body: | ||
%x = extractelement <2 x i16> %coords_lo, i32 0 | ||
%y = extractelement <2 x i16> %coords_lo, i32 1 | ||
%z = extractelement <2 x i16> %coords_hi, i32 0 | ||
%v = call <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i16(i32 3, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0) | ||
ret <4 x half> %v | ||
} | ||
|
||
; GCN-LABEL: {{^}}load.v3f16.3d: | ||
; GCN: image_load v[0:1], v[0:1], s[0:7] dmask:0x7 unorm a16 d16 | ||
define amdgpu_ps <4 x half> @load.v3f16.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { | ||
main_body: | ||
%x = extractelement <2 x i16> %coords_lo, i32 0 | ||
%y = extractelement <2 x i16> %coords_lo, i32 1 | ||
%z = extractelement <2 x i16> %coords_hi, i32 0 | ||
%v = call <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i16(i32 7, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0) | ||
ret <4 x half> %v | ||
} | ||
|
||
; GCN-LABEL: {{^}}load.v4f16.3d: | ||
; GCN: image_load v[0:1], v[0:1], s[0:7] dmask:0xf unorm a16 d16 | ||
define amdgpu_ps <4 x half> @load.v4f16.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { | ||
main_body: | ||
%x = extractelement <2 x i16> %coords_lo, i32 0 | ||
%y = extractelement <2 x i16> %coords_lo, i32 1 | ||
%z = extractelement <2 x i16> %coords_hi, i32 0 | ||
%v = call <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i16(i32 15, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0) | ||
ret <4 x half> %v | ||
} | ||
|
||
declare <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i16(i32, i16, <8 x i32>, i32, i32) #2 | ||
declare <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i16(i32, i16, i16, <8 x i32>, i32, i32) #2 | ||
declare <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #2 | ||
|
||
attributes #0 = { nounwind } | ||
attributes #1 = { nounwind readonly } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s | ||
|
||
; GCN-LABEL: {{^}}load.f32.1d: | ||
; GCN: image_load v[0:3], v0, s[0:7] dmask:0x1 unorm a16 | ||
define amdgpu_ps <4 x float> @load.f32.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { | ||
main_body: | ||
%x = extractelement <2 x i16> %coords, i32 0 | ||
%v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 1, i16 %x, <8 x i32> %rsrc, i32 0, i32 0) | ||
ret <4 x float> %v | ||
} | ||
|
||
; GCN-LABEL: {{^}}load.v2f32.1d: | ||
; GCN: image_load v[0:3], v0, s[0:7] dmask:0x3 unorm a16 | ||
define amdgpu_ps <4 x float> @load.v2f32.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { | ||
main_body: | ||
%x = extractelement <2 x i16> %coords, i32 0 | ||
%v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 3, i16 %x, <8 x i32> %rsrc, i32 0, i32 0) | ||
ret <4 x float> %v | ||
} | ||
|
||
; GCN-LABEL: {{^}}load.v3f32.1d: | ||
; GCN: image_load v[0:3], v0, s[0:7] dmask:0x7 unorm a16 | ||
define amdgpu_ps <4 x float> @load.v3f32.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { | ||
main_body: | ||
%x = extractelement <2 x i16> %coords, i32 0 | ||
%v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 7, i16 %x, <8 x i32> %rsrc, i32 0, i32 0) | ||
ret <4 x float> %v | ||
} | ||
|
||
; GCN-LABEL: {{^}}load.v4f32.1d: | ||
; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm a16 | ||
define amdgpu_ps <4 x float> @load.v4f32.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { | ||
main_body: | ||
%x = extractelement <2 x i16> %coords, i32 0 | ||
%v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %x, <8 x i32> %rsrc, i32 0, i32 0) | ||
ret <4 x float> %v | ||
} | ||
|
||
; GCN-LABEL: {{^}}load.f32.2d: | ||
; GCN: image_load v[0:3], v0, s[0:7] dmask:0x1 unorm a16 | ||
define amdgpu_ps <4 x float> @load.f32.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { | ||
main_body: | ||
%x = extractelement <2 x i16> %coords, i32 0 | ||
%y = extractelement <2 x i16> %coords, i32 1 | ||
%v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 1, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0) | ||
ret <4 x float> %v | ||
} | ||
|
||
; GCN-LABEL: {{^}}load.v2f32.2d: | ||
; GCN: image_load v[0:3], v0, s[0:7] dmask:0x3 unorm a16 | ||
define amdgpu_ps <4 x float> @load.v2f32.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { | ||
main_body: | ||
%x = extractelement <2 x i16> %coords, i32 0 | ||
%y = extractelement <2 x i16> %coords, i32 1 | ||
%v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 3, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0) | ||
ret <4 x float> %v | ||
} | ||
|
||
; GCN-LABEL: {{^}}load.v3f32.2d: | ||
; GCN: image_load v[0:3], v0, s[0:7] dmask:0x7 unorm a16 | ||
define amdgpu_ps <4 x float> @load.v3f32.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { | ||
main_body: | ||
%x = extractelement <2 x i16> %coords, i32 0 | ||
%y = extractelement <2 x i16> %coords, i32 1 | ||
%v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 7, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0) | ||
ret <4 x float> %v | ||
} | ||
|
||
; GCN-LABEL: {{^}}load.v4f32.2d: | ||
; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm a16 | ||
define amdgpu_ps <4 x float> @load.v4f32.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) { | ||
main_body: | ||
%x = extractelement <2 x i16> %coords, i32 0 | ||
%y = extractelement <2 x i16> %coords, i32 1 | ||
%v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 15, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0) | ||
ret <4 x float> %v | ||
} | ||
|
||
; GCN-LABEL: {{^}}load.f32.3d: | ||
; GCN: image_load v[0:3], v[0:1], s[0:7] dmask:0x1 unorm a16 | ||
define amdgpu_ps <4 x float> @load.f32.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { | ||
main_body: | ||
%x = extractelement <2 x i16> %coords_lo, i32 0 | ||
%y = extractelement <2 x i16> %coords_lo, i32 1 | ||
%z = extractelement <2 x i16> %coords_hi, i32 0 | ||
%v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 1, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0) | ||
ret <4 x float> %v | ||
} | ||
|
||
; GCN-LABEL: {{^}}load.v2f32.3d: | ||
; GCN: image_load v[0:3], v[0:1], s[0:7] dmask:0x3 unorm a16 | ||
define amdgpu_ps <4 x float> @load.v2f32.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { | ||
main_body: | ||
%x = extractelement <2 x i16> %coords_lo, i32 0 | ||
%y = extractelement <2 x i16> %coords_lo, i32 1 | ||
%z = extractelement <2 x i16> %coords_hi, i32 0 | ||
%v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 3, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0) | ||
ret <4 x float> %v | ||
} | ||
|
||
; GCN-LABEL: {{^}}load.v3f32.3d: | ||
; GCN: image_load v[0:3], v[0:1], s[0:7] dmask:0x7 unorm a16 | ||
define amdgpu_ps <4 x float> @load.v3f32.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { | ||
main_body: | ||
%x = extractelement <2 x i16> %coords_lo, i32 0 | ||
%y = extractelement <2 x i16> %coords_lo, i32 1 | ||
%z = extractelement <2 x i16> %coords_hi, i32 0 | ||
%v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 7, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0) | ||
ret <4 x float> %v | ||
} | ||
|
||
; GCN-LABEL: {{^}}load.v4f32.3d: | ||
; GCN: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 | ||
define amdgpu_ps <4 x float> @load.v4f32.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) { | ||
main_body: | ||
%x = extractelement <2 x i16> %coords_lo, i32 0 | ||
%y = extractelement <2 x i16> %coords_lo, i32 1 | ||
%z = extractelement <2 x i16> %coords_hi, i32 0 | ||
%v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 15, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0) | ||
ret <4 x float> %v | ||
} | ||
|
||
declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2 | ||
declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #2 | ||
declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #2 | ||
|
||
attributes #0 = { nounwind } | ||
attributes #1 = { nounwind readonly } |
Oops, something went wrong.