Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2094,11 +2094,21 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
break;

case AMDGPU::SI_SPILL_S32_TO_VGPR:
MI.setDesc(get(AMDGPU::V_WRITELANE_B32));
mutateAndCleanupImplicit(MI, get(AMDGPU::V_WRITELANE_B32));
// When leftover implicit-def operands are removed, kill flag is no longer
// valid. Thus:
// $X = SI_SPILL_S32_TO_VGPR killed $sgpr0, 0, $X(tied-def 0),
// implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1
// must be converted to:
// $X = V_WRITELANE_B32 $sgpr0, 0, $X(tied-def 0)
MI.getOperand(1).setIsKill(false);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This part seems plausible

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tested in invalid-kill.mir.

// Sometimes a SGPR that has already been killed is spilled.
// Add undef to appease the MachineVerifier.
MI.getOperand(1).setIsUndef(true);
Comment on lines +2105 to +2107
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This part does not. Can you add mir test function for these cases

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tested in spilled-kill.mir.

break;

case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
MI.setDesc(get(AMDGPU::V_READLANE_B32));
mutateAndCleanupImplicit(MI, get(AMDGPU::V_READLANE_B32));
break;
case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
Register Dst = MI.getOperand(0).getReg();
Expand Down
1,272 changes: 636 additions & 636 deletions llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll

Large diffs are not rendered by default.

62 changes: 31 additions & 31 deletions llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll
Original file line number Diff line number Diff line change
Expand Up @@ -66739,11 +66739,9 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32
; SI-NEXT: v_writelane_b32 v21, s17, 13
; SI-NEXT: .LBB97_3: ; %end
; SI-NEXT: v_readlane_b32 s18, v21, 0
; SI-NEXT: v_readlane_b32 s19, v21, 1
; SI-NEXT: s_and_b32 s16, s40, 0xff
; SI-NEXT: s_lshl_b32 s17, s18, 8
; SI-NEXT: v_readlane_b32 s18, v21, 2
; SI-NEXT: s_and_b32 s16, s40, 0xff
; SI-NEXT: v_readlane_b32 s19, v21, 3
; SI-NEXT: s_or_b32 s16, s16, s17
; SI-NEXT: s_and_b32 s17, s18, 0xff
; SI-NEXT: v_readlane_b32 s18, v21, 4
Expand All @@ -66765,9 +66763,8 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32
; SI-NEXT: v_mov_b32_e32 v2, s16
; SI-NEXT: v_readlane_b32 s16, v21, 6
; SI-NEXT: s_and_b32 s14, s14, 0xff
; SI-NEXT: v_readlane_b32 s17, v21, 7
; SI-NEXT: s_lshl_b32 s16, s16, 8
; SI-NEXT: v_readlane_b32 s19, v21, 5
; SI-NEXT: v_readlane_b32 s17, v21, 7
; SI-NEXT: s_or_b32 s14, s14, s16
; SI-NEXT: v_readlane_b32 s16, v21, 8
; SI-NEXT: v_readlane_b32 s17, v21, 9
Expand Down Expand Up @@ -66799,8 +66796,8 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32
; SI-NEXT: v_mov_b32_e32 v2, s14
; SI-NEXT: v_readlane_b32 s14, v21, 12
; SI-NEXT: s_and_b32 s10, s10, 0xff
; SI-NEXT: v_readlane_b32 s15, v21, 13
; SI-NEXT: s_lshl_b32 s14, s14, 8
; SI-NEXT: v_readlane_b32 s15, v21, 13
; SI-NEXT: s_or_b32 s10, s10, s14
; SI-NEXT: v_readlane_b32 s14, v21, 14
; SI-NEXT: v_readlane_b32 s15, v21, 15
Expand Down Expand Up @@ -66951,10 +66948,13 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32
; SI-NEXT: s_and_b32 s5, s89, 0xff
; SI-NEXT: s_lshl_b32 s5, s5, 16
; SI-NEXT: s_lshl_b32 s6, s91, 24
; SI-NEXT: v_readlane_b32 s19, v21, 1
; SI-NEXT: s_and_b32 s4, s4, 0xffff
; SI-NEXT: s_or_b32 s5, s6, s5
; SI-NEXT: v_readlane_b32 s19, v21, 3
; SI-NEXT: v_add_i32_e32 v1, vcc, 56, v0
; SI-NEXT: s_or_b32 s4, s4, s5
; SI-NEXT: v_readlane_b32 s19, v21, 5
; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen
; SI-NEXT: v_add_i32_e32 v0, vcc, 60, v0
; SI-NEXT: v_mov_b32_e32 v1, s4
Expand Down Expand Up @@ -67009,6 +67009,28 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32
; SI-NEXT: v_writelane_b32 v21, s4, 0
; SI-NEXT: v_writelane_b32 v21, s5, 1
; SI-NEXT: ; implicit-def: $sgpr4
; SI-NEXT: v_writelane_b32 v21, s4, 2
; SI-NEXT: v_writelane_b32 v21, s5, 3
; SI-NEXT: ; implicit-def: $sgpr4
; SI-NEXT: v_writelane_b32 v21, s4, 4
; SI-NEXT: v_writelane_b32 v21, s5, 5
; SI-NEXT: ; implicit-def: $sgpr4
; SI-NEXT: v_writelane_b32 v21, s4, 6
; SI-NEXT: v_writelane_b32 v21, s5, 7
; SI-NEXT: ; implicit-def: $sgpr4
; SI-NEXT: v_writelane_b32 v21, s4, 8
; SI-NEXT: v_writelane_b32 v21, s5, 9
; SI-NEXT: ; implicit-def: $sgpr4
; SI-NEXT: v_writelane_b32 v21, s4, 10
; SI-NEXT: v_writelane_b32 v21, s5, 11
; SI-NEXT: ; implicit-def: $sgpr4
; SI-NEXT: v_writelane_b32 v21, s4, 12
; SI-NEXT: v_writelane_b32 v21, s5, 13
; SI-NEXT: ; implicit-def: $sgpr4
; SI-NEXT: v_writelane_b32 v21, s4, 14
; SI-NEXT: v_writelane_b32 v21, s5, 15
; SI-NEXT: ; implicit-def: $sgpr4
; SI-NEXT: v_writelane_b32 v21, s4, 16
; SI-NEXT: ; implicit-def: $sgpr40
; SI-NEXT: ; implicit-def: $sgpr60
; SI-NEXT: ; implicit-def: $sgpr74
Expand Down Expand Up @@ -67036,6 +67058,7 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32
; SI-NEXT: ; implicit-def: $sgpr79
; SI-NEXT: ; implicit-def: $sgpr89
; SI-NEXT: ; implicit-def: $sgpr91
; SI-NEXT: v_writelane_b32 v21, s5, 17
; SI-NEXT: ; implicit-def: $sgpr42
; SI-NEXT: ; implicit-def: $sgpr66
; SI-NEXT: ; implicit-def: $sgpr64
Expand All @@ -67052,33 +67075,10 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32
; SI-NEXT: ; implicit-def: $sgpr30
; SI-NEXT: ; implicit-def: $sgpr94
; SI-NEXT: ; implicit-def: $sgpr92
; SI-NEXT: ; implicit-def: $sgpr4
; SI-NEXT: ; implicit-def: $sgpr90
; SI-NEXT: ; implicit-def: $sgpr88
; SI-NEXT: ; implicit-def: $sgpr78
; SI-NEXT: v_writelane_b32 v21, s4, 2
; SI-NEXT: v_writelane_b32 v21, s5, 3
; SI-NEXT: ; implicit-def: $sgpr4
; SI-NEXT: v_writelane_b32 v21, s4, 4
; SI-NEXT: v_writelane_b32 v21, s5, 5
; SI-NEXT: ; implicit-def: $sgpr4
; SI-NEXT: v_writelane_b32 v21, s4, 6
; SI-NEXT: v_writelane_b32 v21, s5, 7
; SI-NEXT: ; implicit-def: $sgpr4
; SI-NEXT: v_writelane_b32 v21, s4, 8
; SI-NEXT: v_writelane_b32 v21, s5, 9
; SI-NEXT: ; implicit-def: $sgpr4
; SI-NEXT: v_writelane_b32 v21, s4, 10
; SI-NEXT: v_writelane_b32 v21, s5, 11
; SI-NEXT: ; implicit-def: $sgpr4
; SI-NEXT: v_writelane_b32 v21, s4, 12
; SI-NEXT: v_writelane_b32 v21, s5, 13
; SI-NEXT: ; implicit-def: $sgpr4
; SI-NEXT: v_writelane_b32 v21, s4, 14
; SI-NEXT: v_writelane_b32 v21, s5, 15
; SI-NEXT: ; implicit-def: $sgpr4
; SI-NEXT: v_writelane_b32 v21, s4, 16
; SI-NEXT: v_writelane_b32 v21, s5, 17
; SI-NEXT: ; implicit-def: $sgpr4
; SI-NEXT: s_branch .LBB97_2
;
; VI-LABEL: bitcast_v32i16_to_v64i8_scalar:
Expand Down Expand Up @@ -88402,8 +88402,8 @@ define inreg <64 x i8> @bitcast_v32bf16_to_v64i8_scalar(<32 x bfloat> inreg %a,
; SI-NEXT: s_lshr_b64 s[4:5], s[74:75], 24
; SI-NEXT: s_waitcnt expcnt(0)
; SI-NEXT: v_writelane_b32 v41, s4, 0
; SI-NEXT: v_writelane_b32 v41, s5, 1
; SI-NEXT: v_readfirstlane_b32 s4, v6
; SI-NEXT: v_writelane_b32 v41, s5, 1
; SI-NEXT: s_lshr_b32 s5, s4, 16
; SI-NEXT: v_readfirstlane_b32 s4, v7
; SI-NEXT: s_lshr_b64 s[60:61], s[4:5], 16
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,8 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, i32 %in) {
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: v_readlane_b32 s0, v0, 0
; GCN-NEXT: v_mov_b32_e32 v1, vcc_lo
; GCN-NEXT: v_readlane_b32 s0, v0, 0
; GCN-NEXT: v_readlane_b32 s1, v0, 1
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: ;;#ASMSTART
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ body: |
; CHECK: S_NOP 0, implicit-def $exec_lo
; CHECK-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 undef $sgpr0, 0, $vgpr0
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK-NEXT: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
Expand All @@ -40,7 +40,7 @@ body: |
; CHECK: S_NOP 0, implicit-def $exec_hi
; CHECK-NEXT: $sgpr0 = S_MOV_B32 $exec_hi
; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 undef $sgpr0, 0, $vgpr0
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK-NEXT: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
Expand All @@ -65,12 +65,12 @@ body: |
; CHECK: S_NOP 0, implicit-def $exec
; CHECK-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 undef $sgpr0, 0, $vgpr0
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 undef $sgpr1, 1, $vgpr0
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK-NEXT: $sgpr1 = V_READLANE_B32 $vgpr0, 1
; CHECK-NEXT: S_NOP 0, implicit-def dead renamable $sgpr2_sgpr3, implicit-def dead renamable $sgpr0_sgpr1, implicit killed renamable $sgpr0_sgpr1
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK-NEXT: $sgpr1 = V_READLANE_B32 $vgpr0, 1
; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1
; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec
Expand All @@ -95,7 +95,7 @@ body: |
; CHECK-LABEL: name: reload_sgpr_spill_into_copy_to_exec_lo
; CHECK: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $exec_lo
; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 undef $sgpr0, 0, $vgpr0
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK-NEXT: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
Expand All @@ -118,7 +118,7 @@ body: |
; CHECK-LABEL: name: reload_sgpr_spill_into_copy_to_exec_hi
; CHECK: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $exec_hi
; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 undef $sgpr0, 0, $vgpr0
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK-NEXT: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
Expand All @@ -141,12 +141,12 @@ body: |
; CHECK-LABEL: name: reload_sgpr_spill_into_copy_to_exec
; CHECK: S_NOP 0, implicit-def renamable $sgpr0_sgpr1, implicit-def dead renamable $sgpr2_sgpr3, implicit-def $exec
; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 undef $sgpr0, 0, $vgpr0
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 undef $sgpr1, 1, $vgpr0
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK-NEXT: $sgpr1 = V_READLANE_B32 $vgpr0, 1
; CHECK-NEXT: S_NOP 0, implicit killed renamable $sgpr0_sgpr1, implicit-def dead renamable $sgpr2_sgpr3, implicit-def dead renamable $sgpr0_sgpr1
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK-NEXT: $sgpr1 = V_READLANE_B32 $vgpr0, 1
; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1
; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ body: |
; CHECK: S_NOP 0, implicit-def $m0
; CHECK-NEXT: $sgpr0 = S_MOV_B32 $m0
; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 undef $sgpr0, 0, $vgpr0
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK-NEXT: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
Expand Down Expand Up @@ -45,7 +45,7 @@ body: |
; CHECK-LABEL: name: reload_sgpr_spill_into_copy_to_m0
; CHECK: $vgpr0 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $m0
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 undef $sgpr0, 0, $vgpr0
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK-NEXT: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
Expand Down
38 changes: 19 additions & 19 deletions llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ define void @main(i1 %arg) #0 {
; CHECK-NEXT: s_load_dwordx16 s[8:23], s[68:69], 0x130
; CHECK-NEXT: ; implicit-def: $vgpr7 : SGPR spill to VGPR lane
; CHECK-NEXT: v_writelane_b32 v6, s70, 20
; CHECK-NEXT: v_mov_b32_e32 v2, 0
; CHECK-NEXT: v_writelane_b32 v6, s71, 21
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v1, s4
; CHECK-NEXT: v_mov_b32_e32 v2, 0
; CHECK-NEXT: v_writelane_b32 v7, s8, 0
; CHECK-NEXT: v_writelane_b32 v7, s9, 1
; CHECK-NEXT: v_writelane_b32 v7, s10, 2
Expand Down Expand Up @@ -76,28 +76,28 @@ define void @main(i1 %arg) #0 {
; CHECK-NEXT: v_writelane_b32 v7, s64, 28
; CHECK-NEXT: v_writelane_b32 v7, s65, 29
; CHECK-NEXT: v_writelane_b32 v7, s66, 30
; CHECK-NEXT: v_writelane_b32 v7, s67, 31
; CHECK-NEXT: s_load_dwordx16 s[8:23], s[68:69], 0x1f0
; CHECK-NEXT: s_load_dwordx16 s[36:51], s[68:69], 0x2f0
; CHECK-NEXT: s_mov_b32 s69, s68
; CHECK-NEXT: s_mov_b32 s70, s68
; CHECK-NEXT: s_mov_b32 s71, s68
; CHECK-NEXT: v_writelane_b32 v7, s67, 31
; CHECK-NEXT: image_sample_lz v1, v[1:2], s[60:67], s[68:71] dmask:0x1
; CHECK-NEXT: v_readlane_b32 s52, v7, 0
; CHECK-NEXT: v_mov_b32_e32 v3, v2
; CHECK-NEXT: v_readlane_b32 s52, v7, 0
; CHECK-NEXT: v_readlane_b32 s53, v7, 1
; CHECK-NEXT: v_readlane_b32 s54, v7, 2
; CHECK-NEXT: v_readlane_b32 s55, v7, 3
; CHECK-NEXT: v_readlane_b32 s56, v7, 4
; CHECK-NEXT: v_readlane_b32 s57, v7, 5
; CHECK-NEXT: v_readlane_b32 s58, v7, 6
; CHECK-NEXT: v_readlane_b32 s59, v7, 7
; CHECK-NEXT: image_sample_lz v1, v[1:2], s[60:67], s[68:71] dmask:0x1
; CHECK-NEXT: v_and_b32_e32 v5, 1, v0
; CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 1, v5
; CHECK-NEXT: v_readlane_b32 s60, v7, 8
; CHECK-NEXT: v_readlane_b32 s61, v7, 9
; CHECK-NEXT: v_readlane_b32 s62, v7, 10
; CHECK-NEXT: image_sample_lz v4, v[2:3], s[52:59], s[68:71] dmask:0x1
; CHECK-NEXT: v_readlane_b32 s62, v7, 10
; CHECK-NEXT: v_readlane_b32 s63, v7, 11
; CHECK-NEXT: v_readlane_b32 s64, v7, 12
; CHECK-NEXT: v_readlane_b32 s65, v7, 13
Expand All @@ -109,7 +109,6 @@ define void @main(i1 %arg) #0 {
; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
; CHECK-NEXT: s_cbranch_execz .LBB0_3
; CHECK-NEXT: ; %bb.1: ; %bb48
; CHECK-NEXT: v_readlane_b32 s52, v7, 16
; CHECK-NEXT: v_readlane_b32 s60, v7, 24
; CHECK-NEXT: v_readlane_b32 s61, v7, 25
; CHECK-NEXT: v_readlane_b32 s62, v7, 26
Expand All @@ -120,10 +119,11 @@ define void @main(i1 %arg) #0 {
; CHECK-NEXT: v_readlane_b32 s67, v7, 31
; CHECK-NEXT: v_mov_b32_e32 v1, v2
; CHECK-NEXT: s_and_b64 vcc, exec, -1
; CHECK-NEXT: v_readlane_b32 s52, v7, 16
; CHECK-NEXT: v_readlane_b32 s53, v7, 17
; CHECK-NEXT: v_readlane_b32 s54, v7, 18
; CHECK-NEXT: v_readlane_b32 s55, v7, 19
; CHECK-NEXT: image_sample_lz v3, v[2:3], s[60:67], s[68:71] dmask:0x1
; CHECK-NEXT: v_readlane_b32 s55, v7, 19
; CHECK-NEXT: v_readlane_b32 s56, v7, 20
; CHECK-NEXT: v_readlane_b32 s57, v7, 21
; CHECK-NEXT: v_readlane_b32 s58, v7, 22
Expand Down Expand Up @@ -152,27 +152,25 @@ define void @main(i1 %arg) #0 {
; CHECK-NEXT: s_mov_b32 s16, 0
; CHECK-NEXT: s_mov_b32 s17, s16
; CHECK-NEXT: v_mov_b32_e32 v0, s16
; CHECK-NEXT: v_readlane_b32 s44, v7, 16
; CHECK-NEXT: v_readlane_b32 s52, v7, 24
; CHECK-NEXT: v_readlane_b32 s53, v7, 25
; CHECK-NEXT: v_readlane_b32 s54, v7, 26
; CHECK-NEXT: v_readlane_b32 s55, v7, 27
; CHECK-NEXT: v_readlane_b32 s56, v7, 28
; CHECK-NEXT: v_readlane_b32 s57, v7, 29
; CHECK-NEXT: v_readlane_b32 s58, v7, 30
; CHECK-NEXT: v_readlane_b32 s59, v7, 31
; CHECK-NEXT: v_mov_b32_e32 v1, s17
; CHECK-NEXT: s_mov_b32 s18, s16
; CHECK-NEXT: s_mov_b32 s19, s16
; CHECK-NEXT: v_readlane_b32 s44, v7, 16
; CHECK-NEXT: v_readlane_b32 s45, v7, 17
; CHECK-NEXT: v_readlane_b32 s46, v7, 18
; CHECK-NEXT: v_readlane_b32 s47, v7, 19
; CHECK-NEXT: v_readlane_b32 s48, v7, 20
; CHECK-NEXT: v_readlane_b32 s49, v7, 21
; CHECK-NEXT: v_readlane_b32 s50, v7, 22
; CHECK-NEXT: v_readlane_b32 s51, v7, 23
; CHECK-NEXT: v_readlane_b32 s52, v7, 24
; CHECK-NEXT: v_readlane_b32 s53, v7, 25
; CHECK-NEXT: v_readlane_b32 s54, v7, 26
; CHECK-NEXT: v_readlane_b32 s55, v7, 27
; CHECK-NEXT: v_readlane_b32 s56, v7, 28
; CHECK-NEXT: v_readlane_b32 s57, v7, 29
; CHECK-NEXT: v_readlane_b32 s58, v7, 30
; CHECK-NEXT: v_readlane_b32 s59, v7, 31
; CHECK-NEXT: image_sample_lz v2, v[0:1], s[44:51], s[16:19] dmask:0x1
; CHECK-NEXT: v_readlane_b32 s44, v7, 0
; CHECK-NEXT: v_readlane_b32 s52, v7, 8
; CHECK-NEXT: v_readlane_b32 s53, v7, 9
; CHECK-NEXT: v_readlane_b32 s54, v7, 10
Expand All @@ -181,12 +179,14 @@ define void @main(i1 %arg) #0 {
; CHECK-NEXT: v_readlane_b32 s57, v7, 13
; CHECK-NEXT: v_readlane_b32 s58, v7, 14
; CHECK-NEXT: v_readlane_b32 s59, v7, 15
; CHECK-NEXT: image_sample_lz v2, v[0:1], s[44:51], s[16:19] dmask:0x1
; CHECK-NEXT: v_mov_b32_e32 v3, 0
; CHECK-NEXT: v_mov_b32_e32 v4, v3
; CHECK-NEXT: v_readlane_b32 s44, v7, 0
; CHECK-NEXT: v_readlane_b32 s45, v7, 1
; CHECK-NEXT: image_sample_lz v0, v[0:1], s[52:59], s[24:27] dmask:0x1
; CHECK-NEXT: v_readlane_b32 s46, v7, 2
; CHECK-NEXT: v_readlane_b32 s47, v7, 3
; CHECK-NEXT: image_sample_lz v0, v[0:1], s[52:59], s[24:27] dmask:0x1
; CHECK-NEXT: v_readlane_b32 s48, v7, 4
; CHECK-NEXT: v_readlane_b32 s49, v7, 5
; CHECK-NEXT: v_readlane_b32 s50, v7, 6
Expand Down
Loading
Loading