159 changes: 159 additions & 0 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-amdgcn.workitem.id.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -run-pass=legalizer -o - %s | FileCheck -check-prefix=GCN %s

--- |
define amdgpu_kernel void @test_workitem_id_x_unpacked() !reqd_work_group_size !0 {
ret void
}

define amdgpu_kernel void @test_workitem_id_y_unpacked() !reqd_work_group_size !0 {
ret void
}

define amdgpu_kernel void @test_workitem_id_z_unpacked() !reqd_work_group_size !0 {
ret void
}

define amdgpu_kernel void @test_workitem_id_x_packed() !reqd_work_group_size !0 {
ret void
}

define amdgpu_kernel void @test_workitem_id_y_packed() !reqd_work_group_size !0 {
ret void
}

define amdgpu_kernel void @test_workitem_id_z_packed() !reqd_work_group_size !0 {
ret void
}

define amdgpu_kernel void @missing_arg_info() "amdgpu-no-workitem-id-x" {
ret void
}

!0 = !{i32 256, i32 8, i32 4}
...
---
name: test_workitem_id_x_unpacked
machineFunctionInfo:
argumentInfo:
workGroupIDX: { reg: '$sgpr2' }
workItemIDX: { reg: '$vgpr0' }
workItemIDY: { reg: '$vgpr1' }
workItemIDZ: { reg: '$vgpr2' }
body: |
bb.0:
; GCN-LABEL: name: test_workitem_id_x_unpacked
; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY1]], 8
; GCN-NEXT: S_ENDPGM 0, implicit [[ASSERT_ZEXT]](s32)
%0:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x)
S_ENDPGM 0, implicit %0
...

---
name: test_workitem_id_y_unpacked
machineFunctionInfo:
argumentInfo:
workGroupIDX: { reg: '$sgpr2' }
workItemIDX: { reg: '$vgpr0' }
workItemIDY: { reg: '$vgpr1' }
workItemIDZ: { reg: '$vgpr2' }
body: |
bb.0:
; GCN-LABEL: name: test_workitem_id_y_unpacked
; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY1]], 3
; GCN-NEXT: S_ENDPGM 0, implicit [[ASSERT_ZEXT]](s32)
%0:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.y)
S_ENDPGM 0, implicit %0
...

---
name: test_workitem_id_z_unpacked
machineFunctionInfo:
argumentInfo:
workGroupIDX: { reg: '$sgpr2' }
workItemIDX: { reg: '$vgpr0' }
workItemIDY: { reg: '$vgpr1' }
workItemIDZ: { reg: '$vgpr2' }
body: |
bb.0:
; GCN-LABEL: name: test_workitem_id_z_unpacked
; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY1]], 2
; GCN-NEXT: S_ENDPGM 0, implicit [[ASSERT_ZEXT]](s32)
%0:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.z)
S_ENDPGM 0, implicit %0
...

---
name: test_workitem_id_x_packed
machineFunctionInfo:
argumentInfo:
workItemIDX: { reg: '$vgpr0', mask: 1023 }
workItemIDY: { reg: '$vgpr0', mask: 1047552 }
workItemIDZ: { reg: '$vgpr0', mask: 1072693248 }
body: |
bb.0:
; GCN-LABEL: name: test_workitem_id_x_packed
; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023
; GCN-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
; GCN-NEXT: S_ENDPGM 0, implicit [[AND]](s32)
%0:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x)
S_ENDPGM 0, implicit %0
...

---
name: test_workitem_id_y_packed
machineFunctionInfo:
argumentInfo:
workItemIDX: { reg: '$vgpr0', mask: 1023 }
workItemIDY: { reg: '$vgpr0', mask: 1047552 }
workItemIDZ: { reg: '$vgpr0', mask: 1072693248 }
body: |
bb.0:
; GCN-LABEL: name: test_workitem_id_y_packed
; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
; GCN-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023
; GCN-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
; GCN-NEXT: S_ENDPGM 0, implicit [[AND]](s32)
%0:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.y)
S_ENDPGM 0, implicit %0
...

---
name: test_workitem_id_z_packed
machineFunctionInfo:
argumentInfo:
workItemIDX: { reg: '$vgpr0', mask: 1023 }
workItemIDY: { reg: '$vgpr0', mask: 1047552 }
workItemIDZ: { reg: '$vgpr0', mask: 1072693248 }
body: |
bb.0:
; GCN-LABEL: name: test_workitem_id_z_packed
; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
; GCN-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023
; GCN-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
; GCN-NEXT: S_ENDPGM 0, implicit [[AND]](s32)
%0:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.z)
S_ENDPGM 0, implicit %0
...

---
name: missing_arg_info
body: |
bb.0:
; GCN-LABEL: name: missing_arg_info
; GCN: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: S_ENDPGM 0, implicit [[DEF]](s32)
%0:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.z)
S_ENDPGM 0, implicit %0
...
102 changes: 102 additions & 0 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-assert-zext.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=gfx90a -run-pass=regbankselect %s -verify-machineinstrs -o - | FileCheck %s

---
name: assert_zext_vgpr
alignment: 4
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; CHECK-LABEL: name: assert_zext_vgpr
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %copy:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: %assert_zext:vgpr(s32) = G_ASSERT_ZEXT %copy, 4
; CHECK-NEXT: S_ENDPGM 0, implicit %assert_zext(s32)
%copy:_(s32) = COPY $vgpr0
%assert_zext:_(s32) = G_ASSERT_ZEXT %copy, 4
S_ENDPGM 0, implicit %assert_zext
...

---
name: assert_zext_sgpr
alignment: 4
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr8
; CHECK-LABEL: name: assert_zext_sgpr
; CHECK: liveins: $sgpr8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %copy:sgpr(s32) = COPY $sgpr8
; CHECK-NEXT: %assert_zext:sgpr(s32) = G_ASSERT_ZEXT %copy, 4
; CHECK-NEXT: S_ENDPGM 0, implicit %assert_zext(s32)
%copy:_(s32) = COPY $sgpr8
%assert_zext:_(s32) = G_ASSERT_ZEXT %copy, 4
S_ENDPGM 0, implicit %assert_zext
...

---
name: assert_zext_agpr
alignment: 4
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $agpr0
; CHECK-LABEL: name: assert_zext_agpr
; CHECK: liveins: $agpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %copy:agpr(s32) = COPY $agpr0
; CHECK-NEXT: %assert_zext:agpr(s32) = G_ASSERT_ZEXT %copy, 4
; CHECK-NEXT: S_ENDPGM 0, implicit %assert_zext(s32)
%copy:_(s32) = COPY $agpr0
%assert_zext:_(s32) = G_ASSERT_ZEXT %copy, 4
S_ENDPGM 0, implicit %assert_zext
...

---
name: assert_zext_vgpr_regclass
alignment: 4
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; CHECK-LABEL: name: assert_zext_vgpr_regclass
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %copy:vgpr_32(s32) = COPY $vgpr0
; CHECK-NEXT: %assert_zext:vgpr(s32) = G_ASSERT_ZEXT %copy, 4
; CHECK-NEXT: S_ENDPGM 0, implicit %assert_zext(s32)
%copy:vgpr_32(s32) = COPY $vgpr0
%assert_zext:_(s32) = G_ASSERT_ZEXT %copy, 4
S_ENDPGM 0, implicit %assert_zext
...

---
name: assert_zext_sgpr_regcllass
alignment: 4
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr8
; CHECK-LABEL: name: assert_zext_sgpr_regcllass
; CHECK: liveins: $sgpr8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %copy:sgpr_32(s32) = COPY $sgpr8
; CHECK-NEXT: %assert_zext:sgpr(s32) = G_ASSERT_ZEXT %copy, 4
; CHECK-NEXT: S_ENDPGM 0, implicit %assert_zext(s32)
%copy:sgpr_32(s32) = COPY $sgpr8
%assert_zext:_(s32) = G_ASSERT_ZEXT %copy, 4
S_ENDPGM 0, implicit %assert_zext
...
220 changes: 76 additions & 144 deletions llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll

Large diffs are not rendered by default.

24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ define amdgpu_kernel void @soff1_voff2(i32 %soff) {
; GFX940-SDAG: ; %bb.0: ; %bb
; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24
; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1
; GFX940-SDAG-NEXT: v_mul_u32_u24_e32 v0, 2, v0
; GFX940-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2
; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4
Expand All @@ -79,7 +79,7 @@ define amdgpu_kernel void @soff1_voff2(i32 %soff) {
; GFX940-GISEL: ; %bb.0: ; %bb
; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24
; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4
; GFX940-GISEL-NEXT: v_mul_u32_u24_e32 v0, 2, v0
; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1
; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1
Expand Down Expand Up @@ -114,7 +114,7 @@ define amdgpu_kernel void @soff1_voff4(i32 %soff) {
; GFX940-SDAG: ; %bb.0: ; %bb
; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24
; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1
; GFX940-SDAG-NEXT: v_mul_u32_u24_e32 v0, 4, v0
; GFX940-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2
; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-SDAG-NEXT: s_add_i32 s0, s0, 4
Expand All @@ -131,7 +131,7 @@ define amdgpu_kernel void @soff1_voff4(i32 %soff) {
; GFX940-GISEL: ; %bb.0: ; %bb
; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24
; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4
; GFX940-GISEL-NEXT: v_mul_u32_u24_e32 v0, 4, v0
; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1
; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-GISEL-NEXT: v_add_u32_e32 v1, s0, v1
Expand Down Expand Up @@ -218,7 +218,7 @@ define amdgpu_kernel void @soff2_voff2(i32 %soff) {
; GFX940-SDAG: ; %bb.0: ; %bb
; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24
; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1
; GFX940-SDAG-NEXT: v_mul_u32_u24_e32 v0, 2, v0
; GFX940-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2
; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 1
Expand All @@ -236,7 +236,7 @@ define amdgpu_kernel void @soff2_voff2(i32 %soff) {
; GFX940-GISEL: ; %bb.0: ; %bb
; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24
; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4
; GFX940-GISEL-NEXT: v_mul_u32_u24_e32 v0, 2, v0
; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1
; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 1
Expand Down Expand Up @@ -272,7 +272,7 @@ define amdgpu_kernel void @soff2_voff4(i32 %soff) {
; GFX940-SDAG: ; %bb.0: ; %bb
; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24
; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1
; GFX940-SDAG-NEXT: v_mul_u32_u24_e32 v0, 4, v0
; GFX940-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2
; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 1
Expand All @@ -290,7 +290,7 @@ define amdgpu_kernel void @soff2_voff4(i32 %soff) {
; GFX940-GISEL: ; %bb.0: ; %bb
; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24
; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4
; GFX940-GISEL-NEXT: v_mul_u32_u24_e32 v0, 4, v0
; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1
; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 1
Expand Down Expand Up @@ -378,7 +378,7 @@ define amdgpu_kernel void @soff4_voff2(i32 %soff) {
; GFX940-SDAG: ; %bb.0: ; %bb
; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24
; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1
; GFX940-SDAG-NEXT: v_mul_u32_u24_e32 v0, 2, v0
; GFX940-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2
; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 2
Expand All @@ -396,7 +396,7 @@ define amdgpu_kernel void @soff4_voff2(i32 %soff) {
; GFX940-GISEL: ; %bb.0: ; %bb
; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24
; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4
; GFX940-GISEL-NEXT: v_mul_u32_u24_e32 v0, 2, v0
; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1
; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 2
Expand Down Expand Up @@ -432,7 +432,7 @@ define amdgpu_kernel void @soff4_voff4(i32 %soff) {
; GFX940-SDAG: ; %bb.0: ; %bb
; GFX940-SDAG-NEXT: s_load_dword s0, s[0:1], 0x24
; GFX940-SDAG-NEXT: v_mov_b32_e32 v1, 1
; GFX940-SDAG-NEXT: v_mul_u32_u24_e32 v0, 4, v0
; GFX940-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX940-SDAG-NEXT: v_mov_b32_e32 v2, 2
; GFX940-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-SDAG-NEXT: s_lshl_b32 s0, s0, 2
Expand All @@ -450,7 +450,7 @@ define amdgpu_kernel void @soff4_voff4(i32 %soff) {
; GFX940-GISEL: ; %bb.0: ; %bb
; GFX940-GISEL-NEXT: s_load_dword s0, s[0:1], 0x24
; GFX940-GISEL-NEXT: v_mov_b32_e32 v1, 4
; GFX940-GISEL-NEXT: v_mul_u32_u24_e32 v0, 4, v0
; GFX940-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX940-GISEL-NEXT: v_mov_b32_e32 v2, 1
; GFX940-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-GISEL-NEXT: s_lshl_b32 s0, s0, 2
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/memory_clause.ll
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,8 @@ define void @mubuf_clause(<4 x i32> addrspace(5)* noalias nocapture readonly %ar
; GCN-LABEL: mubuf_clause:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_and_b32_e32 v2, 0x3ff, v31
; GCN-NEXT: v_lshlrev_b32_e32 v2, 4, v2
; GCN-NEXT: v_lshlrev_b32_e32 v2, 4, v31
; GCN-NEXT: v_and_b32_e32 v2, 0x3ff0, v2
; GCN-NEXT: v_add_u32_e32 v0, v0, v2
; GCN-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen offset:12
; GCN-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen offset:8
Expand Down Expand Up @@ -211,8 +211,8 @@ define void @mubuf_clause(<4 x i32> addrspace(5)* noalias nocapture readonly %ar
; GCN-SCRATCH: ; %bb.0: ; %bb
; GCN-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-SCRATCH-NEXT: v_and_b32_e32 v2, 0x3ff, v31
; GCN-SCRATCH-NEXT: v_lshlrev_b32_e32 v18, 4, v2
; GCN-SCRATCH-NEXT: v_lshlrev_b32_e32 v2, 4, v31
; GCN-SCRATCH-NEXT: v_and_b32_e32 v18, 0x3ff0, v2
; GCN-SCRATCH-NEXT: v_add_nc_u32_e32 v0, v0, v18
; GCN-SCRATCH-NEXT: s_clause 0x3
; GCN-SCRATCH-NEXT: scratch_load_dwordx4 v[2:5], v0, off
Expand Down
33 changes: 0 additions & 33 deletions llvm/test/CodeGen/AMDGPU/zext-lid.ll
Original file line number Diff line number Diff line change
@@ -1,14 +1,8 @@
; RUN: llc -march=amdgcn < %s | FileCheck -enable-var-scope -check-prefixes=GCN,O2 %s
; RUN: llc -O0 -march=amdgcn < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-intrinsics < %s | FileCheck -check-prefix=OPT %s

; GCN-LABEL: {{^}}zext_grp_size_128:
; GCN-NOT: and_b32

; OPT-LABEL: @zext_grp_size_128
; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0
; OPT: tail call i32 @llvm.amdgcn.workitem.id.y(), !range !0
; OPT: tail call i32 @llvm.amdgcn.workitem.id.z(), !range !0
define amdgpu_kernel void @zext_grp_size_128(i32 addrspace(1)* nocapture %arg) #0 {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
Expand All @@ -27,11 +21,6 @@ bb:

; GCN-LABEL: {{^}}zext_grp_size_32x4x1:
; GCN-NOT: and_b32

; OPT-LABEL: @zext_grp_size_32x4x1
; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !2
; OPT: tail call i32 @llvm.amdgcn.workitem.id.y(), !range !3
; OPT: tail call i32 @llvm.amdgcn.workitem.id.z(), !range !4
define amdgpu_kernel void @zext_grp_size_32x4x1(i32 addrspace(1)* nocapture %arg) #0 !reqd_work_group_size !0 {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
Expand All @@ -53,8 +42,6 @@ bb:

; When EarlyCSE is not run this call produces a range max with 0 active bits,
; which is a special case as an AssertZext from width 0 is invalid.
; OPT-LABEL: @zext_grp_size_1x1x1
; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !4
define amdgpu_kernel void @zext_grp_size_1x1x1(i32 addrspace(1)* nocapture %arg) #0 !reqd_work_group_size !1 {
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = and i32 %tmp, 1
Expand All @@ -64,11 +51,6 @@ define amdgpu_kernel void @zext_grp_size_1x1x1(i32 addrspace(1)* nocapture %arg)

; GCN-LABEL: {{^}}zext_grp_size_512:
; GCN-NOT: and_b32

; OPT-LABEL: @zext_grp_size_512
; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !6
; OPT: tail call i32 @llvm.amdgcn.workitem.id.y(), !range !6
; OPT: tail call i32 @llvm.amdgcn.workitem.id.z(), !range !6
define amdgpu_kernel void @zext_grp_size_512(i32 addrspace(1)* nocapture %arg) #1 {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
Expand All @@ -89,9 +71,6 @@ bb:
; O2-NOT: and_b32
; O2: v_and_b32_e32 v{{[0-9]+}}, 0x3ff,
; O2-NOT: and_b32

; OPT-LABEL: @func_test_workitem_id_x_known_max_range(
; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0
define void @func_test_workitem_id_x_known_max_range(i32 addrspace(1)* nocapture %out) #0 {
entry:
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
Expand All @@ -104,9 +83,6 @@ entry:
; O2-NOT: and_b32
; O2: v_and_b32_e32 v{{[0-9]+}}, 0x3ff,
; O2-NOT: and_b32

; OPT-LABEL: @func_test_workitem_id_x_default_range(
; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !7
define void @func_test_workitem_id_x_default_range(i32 addrspace(1)* nocapture %out) #4 {
entry:
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
Expand All @@ -129,12 +105,3 @@ attributes #4 = { nounwind }

!0 = !{i32 32, i32 4, i32 1}
!1 = !{i32 1, i32 1, i32 1}

; OPT: !0 = !{i32 0, i32 128}
; OPT: !1 = !{i32 32, i32 4, i32 1}
; OPT: !2 = !{i32 0, i32 32}
; OPT: !3 = !{i32 0, i32 4}
; OPT: !4 = !{i32 0, i32 1}
; OPT: !5 = !{i32 1, i32 1, i32 1}
; OPT: !6 = !{i32 0, i32 512}
; OPT: !7 = !{i32 0, i32 1024}
Original file line number Diff line number Diff line change
Expand Up @@ -3,33 +3,29 @@

name: test
legalized: true
regBankSelected: true
regBankSelected: false
body: |
bb.0:
liveins: $w0, $w1
%bank:gpr(s32) = COPY $w0
%class:gpr32(s32) = COPY $w1
; CHECK: *** Bad machine code: G_ASSERT_SEXT source and destination register banks must match ***
; CHECK: *** Bad machine code: G_ASSERT_SEXT cannot change register bank ***
; CHECK: instruction: %bank_mismatch:fpr(s32) = G_ASSERT_SEXT %bank:gpr, 16
%bank_mismatch:fpr(s32) = G_ASSERT_SEXT %bank, 16
; CHECK: *** Bad machine code: G_ASSERT_SEXT source and destination register classes must match ***
; CHECK: instruction: %class_mismatch_gpr:gpr32all(s32) = G_ASSERT_SEXT %class:gpr32, 16
%class_mismatch_gpr:gpr32all(s32) = G_ASSERT_SEXT %class, 16
; CHECK: *** Bad machine code: G_ASSERT_SEXT source and destination register classes must match ***
; CHECK: *** Bad machine code: G_ASSERT_SEXT cannot change register bank ***
; CHECK: instruction: %class_mismatch_fpr:fpr32(s32) = G_ASSERT_SEXT %class:gpr32, 16
%class_mismatch_fpr:fpr32(s32) = G_ASSERT_SEXT %class, 16
; CHECK: *** Bad machine code: G_ASSERT_SEXT source and destination register banks must match ***
; CHECK: *** Bad machine code: G_ASSERT_SEXT source and destination register classes must match ***
; CHECK: instruction: %dst_has_class_src_has_bank:gpr32all(s32) = G_ASSERT_SEXT %bank:gpr, 16
%dst_has_class_src_has_bank:gpr32all(s32) = G_ASSERT_SEXT %bank, 16
; CHECK: *** Bad machine code: G_ASSERT_SEXT source and destination register banks must match ***
; CHECK: instruction: %dst_has_bank_src_has_class:gpr(s32) = G_ASSERT_SEXT %class:gpr32, 16
%dst_has_bank_src_has_class:gpr(s32) = G_ASSERT_SEXT %class, 16
; CHECK: *** Bad machine code: Generic instruction cannot have physical register ***
; CHECK: instruction: %implicit_physreg:gpr(s32) = G_ASSERT_SEXT %class:gpr32, 16, implicit-def $w0
%implicit_physreg:gpr(s32) = G_ASSERT_SEXT %class, 16, implicit-def $w0
Original file line number Diff line number Diff line change
Expand Up @@ -3,33 +3,39 @@

name: test
legalized: true
regBankSelected: true
regBankSelected: false
body: |
bb.0:
liveins: $w0, $w1
%bank:gpr(s32) = COPY $w0
%class:gpr32(s32) = COPY $w1
; CHECK: *** Bad machine code: G_ASSERT_ZEXT source and destination register banks must match ***
; CHECK: *** Bad machine code: G_ASSERT_ZEXT cannot change register bank ***
; CHECK: instruction: %bank_mismatch:fpr(s32) = G_ASSERT_ZEXT %bank:gpr, 16
%bank_mismatch:fpr(s32) = G_ASSERT_ZEXT %bank, 16
; CHECK: *** Bad machine code: G_ASSERT_ZEXT source and destination register classes must match ***
; CHECK: instruction: %class_mismatch_gpr:gpr32all(s32) = G_ASSERT_ZEXT %class:gpr32, 16
%class_mismatch_gpr:gpr32all(s32) = G_ASSERT_ZEXT %class, 16
; CHECK: *** Bad machine code: G_ASSERT_ZEXT source and destination register classes must match ***
; CHECK: *** Bad machine code: G_ASSERT_ZEXT cannot change register bank ***
; CHECK: instruction: %class_mismatch_fpr:fpr32(s32) = G_ASSERT_ZEXT %class:gpr32, 16
%class_mismatch_fpr:fpr32(s32) = G_ASSERT_ZEXT %class, 16
; CHECK: *** Bad machine code: G_ASSERT_ZEXT source and destination register banks must match ***
; CHECK: *** Bad machine code: G_ASSERT_ZEXT source and destination register classes must match ***
; CHECK: instruction: %dst_has_class_src_has_bank:gpr32all(s32) = G_ASSERT_ZEXT %bank:gpr, 16
%dst_has_class_src_has_bank:gpr32all(s32) = G_ASSERT_ZEXT %bank, 16
; CHECK: *** Bad machine code: G_ASSERT_ZEXT source and destination register banks must match ***
; CHECK: instruction: %dst_has_bank_src_has_class:gpr(s32) = G_ASSERT_ZEXT %class:gpr32, 16
%dst_has_bank_src_has_class:gpr(s32) = G_ASSERT_ZEXT %class, 16
; CHECK: *** Bad machine code: Generic instruction cannot have physical register ***
; CHECK: instruction: %implicit_physreg:gpr(s32) = G_ASSERT_ZEXT %class:gpr32, 16, implicit-def $w0
%implicit_physreg:gpr(s32) = G_ASSERT_ZEXT %class, 16, implicit-def $w0
%nothing:_(s32) = G_IMPLICIT_DEF
; CHECK: *** Bad machine code: G_ASSERT_ZEXT cannot change register bank ***
; CHECK: %only_dst_has_bank:gpr(s32) = G_ASSERT_ZEXT %nothing:_, 4
%only_dst_has_bank:gpr(s32) = G_ASSERT_ZEXT %nothing, 4
; CHECK: *** Bad machine code: G_ASSERT_ZEXT cannot change register bank ***
; CHECK: %only_dst_has_class:gpr32all(s32) = G_ASSERT_ZEXT %nothing:_, 4
%only_dst_has_class:gpr32all(s32) = G_ASSERT_ZEXT %nothing, 4