Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[AMDGPU] gfx11 ldsdir intrinsics and ISel
Reviewed By: #amdgpu, rampitec Differential Revision: https://reviews.llvm.org/D127664
- Loading branch information
Showing
8 changed files
with
207 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
36 changes: 36 additions & 0 deletions
36
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.lds.direct.load.mir
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py | ||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s | ||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s | ||
|
||
--- | ||
name: lds_direct_load_s | ||
legalized: true | ||
tracksRegLiveness: true | ||
|
||
body: | | ||
bb.0: | ||
liveins: $sgpr0 | ||
; CHECK-LABEL: name: lds_direct_load_s | ||
; CHECK: liveins: $sgpr0 | ||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 | ||
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.lds.direct.load), [[COPY]](s32) | ||
%0:_(s32) = COPY $sgpr0 | ||
%1:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.lds.direct.load), %0 | ||
... | ||
|
||
--- | ||
name: lds_direct_load_v | ||
legalized: true | ||
tracksRegLiveness: true | ||
|
||
body: | | ||
bb.0: | ||
liveins: $vgpr0 | ||
; CHECK-LABEL: name: lds_direct_load_v | ||
; CHECK: liveins: $vgpr0 | ||
; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 | ||
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec | ||
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.lds.direct.load), [[V_READFIRSTLANE_B32_]](s32) | ||
%0:_(s32) = COPY $vgpr0 | ||
%1:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.lds.direct.load), %0 | ||
... |
36 changes: 36 additions & 0 deletions
36
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.lds.param.load.mir
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py | ||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s | ||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s | ||
|
||
--- | ||
name: lds_param_load_s | ||
legalized: true | ||
tracksRegLiveness: true | ||
|
||
body: | | ||
bb.0: | ||
liveins: $sgpr0 | ||
; CHECK-LABEL: name: lds_param_load_s | ||
; CHECK: liveins: $sgpr0 | ||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 | ||
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.lds.param.load), 1, 1, [[COPY]](s32) | ||
%0:_(s32) = COPY $sgpr0 | ||
%1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.lds.param.load), 1, 1, %0 | ||
... | ||
|
||
--- | ||
name: lds_param_load_v | ||
legalized: true | ||
tracksRegLiveness: true | ||
|
||
body: | | ||
bb.0: | ||
liveins: $vgpr0 | ||
; CHECK-LABEL: name: lds_param_load_v | ||
; CHECK: liveins: $vgpr0 | ||
; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 | ||
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec | ||
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.lds.param.load), 1, 1, [[V_READFIRSTLANE_B32_]](s32) | ||
%0:_(s32) = COPY $vgpr0 | ||
%1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.lds.param.load), 1, 1, %0 | ||
... |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s | ||
; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s | ||
|
||
; GFX11-LABEL: {{^}}lds_direct_load: | ||
; GFX11: s_mov_b32 m0 | ||
; GFX11: lds_direct_load v{{[0-9]+}} | ||
; GFX11: s_mov_b32 m0 | ||
; GFX11: lds_direct_load v{{[0-9]+}} | ||
; GFX11: s_mov_b32 m0 | ||
; GFX11: lds_direct_load v{{[0-9]+}} | ||
; GFX11: v_add_f32 | ||
; GFX11: buffer_store_b32 | ||
; GFX11: buffer_store_b32 | ||
; GFX11: buffer_store_b32 | ||
; GFX11: buffer_store_b32 | ||
; GFX11: buffer_store_b32 | ||
; GFX11: buffer_store_b32 | ||
define amdgpu_ps void @lds_direct_load(<4 x i32> inreg %buf, i32 inreg %arg0, | ||
i32 inreg %arg1, i32 inreg %arg2) #0 { | ||
main_body: | ||
%p0 = call float @llvm.amdgcn.lds.direct.load(i32 %arg0) | ||
; Ensure memory clustering is occuring for lds_direct_load | ||
%p5 = fadd float %p0, 1.0 | ||
%p1 = call float @llvm.amdgcn.lds.direct.load(i32 %arg1) | ||
%p2 = call float @llvm.amdgcn.lds.direct.load(i32 %arg2) | ||
%p3 = call float @llvm.amdgcn.lds.direct.load(i32 %arg1) | ||
%p4 = call float @llvm.amdgcn.lds.direct.load(i32 %arg2) | ||
call void @llvm.amdgcn.raw.buffer.store.f32(float %p5, <4 x i32> %buf, i32 4, i32 0, i32 0) | ||
call void @llvm.amdgcn.raw.buffer.store.f32(float %p1, <4 x i32> %buf, i32 4, i32 1, i32 0) | ||
call void @llvm.amdgcn.raw.buffer.store.f32(float %p2, <4 x i32> %buf, i32 4, i32 2, i32 0) | ||
call void @llvm.amdgcn.raw.buffer.store.f32(float %p3, <4 x i32> %buf, i32 4, i32 3, i32 0) | ||
call void @llvm.amdgcn.raw.buffer.store.f32(float %p4, <4 x i32> %buf, i32 4, i32 4, i32 0) | ||
call void @llvm.amdgcn.raw.buffer.store.f32(float %p0, <4 x i32> %buf, i32 4, i32 5, i32 0) | ||
ret void | ||
} | ||
|
||
declare float @llvm.amdgcn.lds.direct.load(i32) #1 | ||
declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32) | ||
|
||
attributes #0 = { nounwind } | ||
attributes #1 = { nounwind readonly } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s | ||
; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s | ||
|
||
; GFX11-LABEL: {{^}}lds_param_load: | ||
; GFX11: s_mov_b32 m0 | ||
; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr0.x | ||
; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr0.y | ||
; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr0.z | ||
; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr0.w | ||
; GFX11-DAG: lds_param_load v{{[0-9]+}}, attr1.x | ||
; GFX11: v_add_f32 | ||
; GFX11: buffer_store_b32 | ||
; GFX11: buffer_store_b32 | ||
; GFX11: buffer_store_b32 | ||
; GFX11: buffer_store_b32 | ||
; GFX11: buffer_store_b32 | ||
; GFX11: buffer_store_b32 | ||
define amdgpu_ps void @lds_param_load(<4 x i32> inreg %buf, i32 inreg %arg) #0 { | ||
main_body: | ||
%p0 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 0, i32 %arg) | ||
; Ensure memory clustering is occuring for lds_param_load | ||
%p5 = fadd float %p0, 1.0 | ||
%p1 = call float @llvm.amdgcn.lds.param.load(i32 1, i32 0, i32 %arg) | ||
%p2 = call float @llvm.amdgcn.lds.param.load(i32 2, i32 0, i32 %arg) | ||
%p3 = call float @llvm.amdgcn.lds.param.load(i32 3, i32 0, i32 %arg) | ||
%p4 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 1, i32 %arg) | ||
call void @llvm.amdgcn.raw.buffer.store.f32(float %p5, <4 x i32> %buf, i32 4, i32 0, i32 0) | ||
call void @llvm.amdgcn.raw.buffer.store.f32(float %p1, <4 x i32> %buf, i32 4, i32 1, i32 0) | ||
call void @llvm.amdgcn.raw.buffer.store.f32(float %p2, <4 x i32> %buf, i32 4, i32 2, i32 0) | ||
call void @llvm.amdgcn.raw.buffer.store.f32(float %p3, <4 x i32> %buf, i32 4, i32 3, i32 0) | ||
call void @llvm.amdgcn.raw.buffer.store.f32(float %p4, <4 x i32> %buf, i32 4, i32 4, i32 0) | ||
call void @llvm.amdgcn.raw.buffer.store.f32(float %p0, <4 x i32> %buf, i32 4, i32 5, i32 0) | ||
ret void | ||
} | ||
|
||
declare float @llvm.amdgcn.lds.param.load(i32, i32, i32) #1 | ||
declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32) | ||
|
||
attributes #0 = { nounwind } | ||
attributes #1 = { nounwind readnone } |