Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2094,11 +2094,11 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
break;

case AMDGPU::SI_SPILL_S32_TO_VGPR:
mutateAndCleanupImplicit(MI, get(AMDGPU::V_WRITELANE_B32));
MI.setDesc(get(AMDGPU::V_WRITELANE_B32));
break;

case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
mutateAndCleanupImplicit(MI, get(AMDGPU::V_READLANE_B32));
MI.setDesc(get(AMDGPU::V_READLANE_B32));
break;
case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
Register Dst = MI.getOperand(0).getReg();
Expand Down
1,272 changes: 636 additions & 636 deletions llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll

Large diffs are not rendered by default.

62 changes: 31 additions & 31 deletions llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll
Original file line number Diff line number Diff line change
Expand Up @@ -66739,9 +66739,11 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32
; SI-NEXT: v_writelane_b32 v21, s17, 13
; SI-NEXT: .LBB97_3: ; %end
; SI-NEXT: v_readlane_b32 s18, v21, 0
; SI-NEXT: s_and_b32 s16, s40, 0xff
; SI-NEXT: v_readlane_b32 s19, v21, 1
; SI-NEXT: s_lshl_b32 s17, s18, 8
; SI-NEXT: v_readlane_b32 s18, v21, 2
; SI-NEXT: s_and_b32 s16, s40, 0xff
; SI-NEXT: v_readlane_b32 s19, v21, 3
; SI-NEXT: s_or_b32 s16, s16, s17
; SI-NEXT: s_and_b32 s17, s18, 0xff
; SI-NEXT: v_readlane_b32 s18, v21, 4
Expand All @@ -66763,8 +66765,9 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32
; SI-NEXT: v_mov_b32_e32 v2, s16
; SI-NEXT: v_readlane_b32 s16, v21, 6
; SI-NEXT: s_and_b32 s14, s14, 0xff
; SI-NEXT: s_lshl_b32 s16, s16, 8
; SI-NEXT: v_readlane_b32 s17, v21, 7
; SI-NEXT: s_lshl_b32 s16, s16, 8
; SI-NEXT: v_readlane_b32 s19, v21, 5
; SI-NEXT: s_or_b32 s14, s14, s16
; SI-NEXT: v_readlane_b32 s16, v21, 8
; SI-NEXT: v_readlane_b32 s17, v21, 9
Expand Down Expand Up @@ -66796,8 +66799,8 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32
; SI-NEXT: v_mov_b32_e32 v2, s14
; SI-NEXT: v_readlane_b32 s14, v21, 12
; SI-NEXT: s_and_b32 s10, s10, 0xff
; SI-NEXT: s_lshl_b32 s14, s14, 8
; SI-NEXT: v_readlane_b32 s15, v21, 13
; SI-NEXT: s_lshl_b32 s14, s14, 8
; SI-NEXT: s_or_b32 s10, s10, s14
; SI-NEXT: v_readlane_b32 s14, v21, 14
; SI-NEXT: v_readlane_b32 s15, v21, 15
Expand Down Expand Up @@ -66948,13 +66951,10 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32
; SI-NEXT: s_and_b32 s5, s89, 0xff
; SI-NEXT: s_lshl_b32 s5, s5, 16
; SI-NEXT: s_lshl_b32 s6, s91, 24
; SI-NEXT: v_readlane_b32 s19, v21, 1
; SI-NEXT: s_and_b32 s4, s4, 0xffff
; SI-NEXT: s_or_b32 s5, s6, s5
; SI-NEXT: v_readlane_b32 s19, v21, 3
; SI-NEXT: v_add_i32_e32 v1, vcc, 56, v0
; SI-NEXT: s_or_b32 s4, s4, s5
; SI-NEXT: v_readlane_b32 s19, v21, 5
; SI-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen
; SI-NEXT: v_add_i32_e32 v0, vcc, 60, v0
; SI-NEXT: v_mov_b32_e32 v1, s4
Expand Down Expand Up @@ -67009,28 +67009,6 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32
; SI-NEXT: v_writelane_b32 v21, s4, 0
; SI-NEXT: v_writelane_b32 v21, s5, 1
; SI-NEXT: ; implicit-def: $sgpr4
; SI-NEXT: v_writelane_b32 v21, s4, 2
; SI-NEXT: v_writelane_b32 v21, s5, 3
; SI-NEXT: ; implicit-def: $sgpr4
; SI-NEXT: v_writelane_b32 v21, s4, 4
; SI-NEXT: v_writelane_b32 v21, s5, 5
; SI-NEXT: ; implicit-def: $sgpr4
; SI-NEXT: v_writelane_b32 v21, s4, 6
; SI-NEXT: v_writelane_b32 v21, s5, 7
; SI-NEXT: ; implicit-def: $sgpr4
; SI-NEXT: v_writelane_b32 v21, s4, 8
; SI-NEXT: v_writelane_b32 v21, s5, 9
; SI-NEXT: ; implicit-def: $sgpr4
; SI-NEXT: v_writelane_b32 v21, s4, 10
; SI-NEXT: v_writelane_b32 v21, s5, 11
; SI-NEXT: ; implicit-def: $sgpr4
; SI-NEXT: v_writelane_b32 v21, s4, 12
; SI-NEXT: v_writelane_b32 v21, s5, 13
; SI-NEXT: ; implicit-def: $sgpr4
; SI-NEXT: v_writelane_b32 v21, s4, 14
; SI-NEXT: v_writelane_b32 v21, s5, 15
; SI-NEXT: ; implicit-def: $sgpr4
; SI-NEXT: v_writelane_b32 v21, s4, 16
; SI-NEXT: ; implicit-def: $sgpr40
; SI-NEXT: ; implicit-def: $sgpr60
; SI-NEXT: ; implicit-def: $sgpr74
Expand Down Expand Up @@ -67058,7 +67036,6 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32
; SI-NEXT: ; implicit-def: $sgpr79
; SI-NEXT: ; implicit-def: $sgpr89
; SI-NEXT: ; implicit-def: $sgpr91
; SI-NEXT: v_writelane_b32 v21, s5, 17
; SI-NEXT: ; implicit-def: $sgpr42
; SI-NEXT: ; implicit-def: $sgpr66
; SI-NEXT: ; implicit-def: $sgpr64
Expand All @@ -67075,10 +67052,33 @@ define inreg <64 x i8> @bitcast_v32i16_to_v64i8_scalar(<32 x i16> inreg %a, i32
; SI-NEXT: ; implicit-def: $sgpr30
; SI-NEXT: ; implicit-def: $sgpr94
; SI-NEXT: ; implicit-def: $sgpr92
; SI-NEXT: ; implicit-def: $sgpr4
; SI-NEXT: ; implicit-def: $sgpr90
; SI-NEXT: ; implicit-def: $sgpr88
; SI-NEXT: ; implicit-def: $sgpr78
; SI-NEXT: v_writelane_b32 v21, s4, 2
; SI-NEXT: v_writelane_b32 v21, s5, 3
; SI-NEXT: ; implicit-def: $sgpr4
; SI-NEXT: v_writelane_b32 v21, s4, 4
; SI-NEXT: v_writelane_b32 v21, s5, 5
; SI-NEXT: ; implicit-def: $sgpr4
; SI-NEXT: v_writelane_b32 v21, s4, 6
; SI-NEXT: v_writelane_b32 v21, s5, 7
; SI-NEXT: ; implicit-def: $sgpr4
; SI-NEXT: v_writelane_b32 v21, s4, 8
; SI-NEXT: v_writelane_b32 v21, s5, 9
; SI-NEXT: ; implicit-def: $sgpr4
; SI-NEXT: v_writelane_b32 v21, s4, 10
; SI-NEXT: v_writelane_b32 v21, s5, 11
; SI-NEXT: ; implicit-def: $sgpr4
; SI-NEXT: v_writelane_b32 v21, s4, 12
; SI-NEXT: v_writelane_b32 v21, s5, 13
; SI-NEXT: ; implicit-def: $sgpr4
; SI-NEXT: v_writelane_b32 v21, s4, 14
; SI-NEXT: v_writelane_b32 v21, s5, 15
; SI-NEXT: ; implicit-def: $sgpr4
; SI-NEXT: v_writelane_b32 v21, s4, 16
; SI-NEXT: v_writelane_b32 v21, s5, 17
; SI-NEXT: ; implicit-def: $sgpr4
; SI-NEXT: s_branch .LBB97_2
;
; VI-LABEL: bitcast_v32i16_to_v64i8_scalar:
Expand Down Expand Up @@ -88402,8 +88402,8 @@ define inreg <64 x i8> @bitcast_v32bf16_to_v64i8_scalar(<32 x bfloat> inreg %a,
; SI-NEXT: s_lshr_b64 s[4:5], s[74:75], 24
; SI-NEXT: s_waitcnt expcnt(0)
; SI-NEXT: v_writelane_b32 v41, s4, 0
; SI-NEXT: v_readfirstlane_b32 s4, v6
; SI-NEXT: v_writelane_b32 v41, s5, 1
; SI-NEXT: v_readfirstlane_b32 s4, v6
; SI-NEXT: s_lshr_b32 s5, s4, 16
; SI-NEXT: v_readfirstlane_b32 s4, v7
; SI-NEXT: s_lshr_b64 s[60:61], s[4:5], 16
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,8 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, i32 %in) {
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: v_mov_b32_e32 v1, vcc_lo
; GCN-NEXT: v_readlane_b32 s0, v0, 0
; GCN-NEXT: v_mov_b32_e32 v1, vcc_lo
; GCN-NEXT: v_readlane_b32 s1, v0, 1
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: ;;#ASMSTART
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,12 @@ body: |
; CHECK: S_NOP 0, implicit-def $exec
; CHECK-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, $vgpr0
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1
; CHECK-NEXT: $sgpr1 = V_READLANE_B32 $vgpr0, 1
; CHECK-NEXT: S_NOP 0, implicit-def dead renamable $sgpr2_sgpr3, implicit-def dead renamable $sgpr0_sgpr1, implicit killed renamable $sgpr0_sgpr1
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1
; CHECK-NEXT: $sgpr1 = V_READLANE_B32 $vgpr0, 1
; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1
; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec
Expand Down Expand Up @@ -141,12 +141,12 @@ body: |
; CHECK-LABEL: name: reload_sgpr_spill_into_copy_to_exec
; CHECK: S_NOP 0, implicit-def renamable $sgpr0_sgpr1, implicit-def dead renamable $sgpr2_sgpr3, implicit-def $exec
; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, $vgpr0
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1
; CHECK-NEXT: $sgpr1 = V_READLANE_B32 $vgpr0, 1
; CHECK-NEXT: S_NOP 0, implicit killed renamable $sgpr0_sgpr1, implicit-def dead renamable $sgpr2_sgpr3, implicit-def dead renamable $sgpr0_sgpr1
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1
; CHECK-NEXT: $sgpr1 = V_READLANE_B32 $vgpr0, 1
; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1
; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec
Expand Down
38 changes: 19 additions & 19 deletions llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ define void @main(i1 %arg) #0 {
; CHECK-NEXT: s_load_dwordx16 s[8:23], s[68:69], 0x130
; CHECK-NEXT: ; implicit-def: $vgpr7 : SGPR spill to VGPR lane
; CHECK-NEXT: v_writelane_b32 v6, s70, 20
; CHECK-NEXT: v_mov_b32_e32 v2, 0
; CHECK-NEXT: v_writelane_b32 v6, s71, 21
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v1, s4
; CHECK-NEXT: v_mov_b32_e32 v2, 0
; CHECK-NEXT: v_writelane_b32 v7, s8, 0
; CHECK-NEXT: v_writelane_b32 v7, s9, 1
; CHECK-NEXT: v_writelane_b32 v7, s10, 2
Expand Down Expand Up @@ -76,28 +76,28 @@ define void @main(i1 %arg) #0 {
; CHECK-NEXT: v_writelane_b32 v7, s64, 28
; CHECK-NEXT: v_writelane_b32 v7, s65, 29
; CHECK-NEXT: v_writelane_b32 v7, s66, 30
; CHECK-NEXT: v_writelane_b32 v7, s67, 31
; CHECK-NEXT: s_load_dwordx16 s[8:23], s[68:69], 0x1f0
; CHECK-NEXT: s_load_dwordx16 s[36:51], s[68:69], 0x2f0
; CHECK-NEXT: s_mov_b32 s69, s68
; CHECK-NEXT: s_mov_b32 s70, s68
; CHECK-NEXT: s_mov_b32 s71, s68
; CHECK-NEXT: v_mov_b32_e32 v3, v2
; CHECK-NEXT: v_writelane_b32 v7, s67, 31
; CHECK-NEXT: image_sample_lz v1, v[1:2], s[60:67], s[68:71] dmask:0x1
; CHECK-NEXT: v_readlane_b32 s52, v7, 0
; CHECK-NEXT: v_mov_b32_e32 v3, v2
; CHECK-NEXT: v_readlane_b32 s53, v7, 1
; CHECK-NEXT: v_readlane_b32 s54, v7, 2
; CHECK-NEXT: v_readlane_b32 s55, v7, 3
; CHECK-NEXT: v_readlane_b32 s56, v7, 4
; CHECK-NEXT: v_readlane_b32 s57, v7, 5
; CHECK-NEXT: v_readlane_b32 s58, v7, 6
; CHECK-NEXT: v_readlane_b32 s59, v7, 7
; CHECK-NEXT: image_sample_lz v1, v[1:2], s[60:67], s[68:71] dmask:0x1
; CHECK-NEXT: v_and_b32_e32 v5, 1, v0
; CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 1, v5
; CHECK-NEXT: v_readlane_b32 s60, v7, 8
; CHECK-NEXT: v_readlane_b32 s61, v7, 9
; CHECK-NEXT: image_sample_lz v4, v[2:3], s[52:59], s[68:71] dmask:0x1
; CHECK-NEXT: v_readlane_b32 s62, v7, 10
; CHECK-NEXT: image_sample_lz v4, v[2:3], s[52:59], s[68:71] dmask:0x1
; CHECK-NEXT: v_readlane_b32 s63, v7, 11
; CHECK-NEXT: v_readlane_b32 s64, v7, 12
; CHECK-NEXT: v_readlane_b32 s65, v7, 13
Expand All @@ -109,6 +109,7 @@ define void @main(i1 %arg) #0 {
; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
; CHECK-NEXT: s_cbranch_execz .LBB0_3
; CHECK-NEXT: ; %bb.1: ; %bb48
; CHECK-NEXT: v_readlane_b32 s52, v7, 16
; CHECK-NEXT: v_readlane_b32 s60, v7, 24
; CHECK-NEXT: v_readlane_b32 s61, v7, 25
; CHECK-NEXT: v_readlane_b32 s62, v7, 26
Expand All @@ -119,11 +120,10 @@ define void @main(i1 %arg) #0 {
; CHECK-NEXT: v_readlane_b32 s67, v7, 31
; CHECK-NEXT: v_mov_b32_e32 v1, v2
; CHECK-NEXT: s_and_b64 vcc, exec, -1
; CHECK-NEXT: v_readlane_b32 s52, v7, 16
; CHECK-NEXT: v_readlane_b32 s53, v7, 17
; CHECK-NEXT: v_readlane_b32 s54, v7, 18
; CHECK-NEXT: image_sample_lz v3, v[2:3], s[60:67], s[68:71] dmask:0x1
; CHECK-NEXT: v_readlane_b32 s55, v7, 19
; CHECK-NEXT: image_sample_lz v3, v[2:3], s[60:67], s[68:71] dmask:0x1
; CHECK-NEXT: v_readlane_b32 s56, v7, 20
; CHECK-NEXT: v_readlane_b32 s57, v7, 21
; CHECK-NEXT: v_readlane_b32 s58, v7, 22
Expand Down Expand Up @@ -152,25 +152,27 @@ define void @main(i1 %arg) #0 {
; CHECK-NEXT: s_mov_b32 s16, 0
; CHECK-NEXT: s_mov_b32 s17, s16
; CHECK-NEXT: v_mov_b32_e32 v0, s16
; CHECK-NEXT: v_readlane_b32 s52, v7, 24
; CHECK-NEXT: v_readlane_b32 s53, v7, 25
; CHECK-NEXT: v_readlane_b32 s54, v7, 26
; CHECK-NEXT: v_readlane_b32 s55, v7, 27
; CHECK-NEXT: v_readlane_b32 s56, v7, 28
; CHECK-NEXT: v_readlane_b32 s57, v7, 29
; CHECK-NEXT: v_readlane_b32 s58, v7, 30
; CHECK-NEXT: v_readlane_b32 s59, v7, 31
; CHECK-NEXT: v_readlane_b32 s44, v7, 16
; CHECK-NEXT: v_mov_b32_e32 v1, s17
; CHECK-NEXT: s_mov_b32 s18, s16
; CHECK-NEXT: s_mov_b32 s19, s16
; CHECK-NEXT: v_readlane_b32 s44, v7, 16
; CHECK-NEXT: v_readlane_b32 s45, v7, 17
; CHECK-NEXT: v_readlane_b32 s46, v7, 18
; CHECK-NEXT: v_readlane_b32 s47, v7, 19
; CHECK-NEXT: v_readlane_b32 s48, v7, 20
; CHECK-NEXT: v_readlane_b32 s49, v7, 21
; CHECK-NEXT: v_readlane_b32 s50, v7, 22
; CHECK-NEXT: v_readlane_b32 s51, v7, 23
; CHECK-NEXT: v_readlane_b32 s52, v7, 24
; CHECK-NEXT: v_readlane_b32 s53, v7, 25
; CHECK-NEXT: v_readlane_b32 s54, v7, 26
; CHECK-NEXT: v_readlane_b32 s55, v7, 27
; CHECK-NEXT: v_readlane_b32 s56, v7, 28
; CHECK-NEXT: v_readlane_b32 s57, v7, 29
; CHECK-NEXT: v_readlane_b32 s58, v7, 30
; CHECK-NEXT: v_readlane_b32 s59, v7, 31
; CHECK-NEXT: image_sample_lz v2, v[0:1], s[44:51], s[16:19] dmask:0x1
; CHECK-NEXT: v_readlane_b32 s44, v7, 0
; CHECK-NEXT: v_readlane_b32 s52, v7, 8
; CHECK-NEXT: v_readlane_b32 s53, v7, 9
; CHECK-NEXT: v_readlane_b32 s54, v7, 10
Expand All @@ -179,14 +181,12 @@ define void @main(i1 %arg) #0 {
; CHECK-NEXT: v_readlane_b32 s57, v7, 13
; CHECK-NEXT: v_readlane_b32 s58, v7, 14
; CHECK-NEXT: v_readlane_b32 s59, v7, 15
; CHECK-NEXT: image_sample_lz v2, v[0:1], s[44:51], s[16:19] dmask:0x1
; CHECK-NEXT: v_mov_b32_e32 v3, 0
; CHECK-NEXT: v_mov_b32_e32 v4, v3
; CHECK-NEXT: v_readlane_b32 s44, v7, 0
; CHECK-NEXT: v_readlane_b32 s45, v7, 1
; CHECK-NEXT: image_sample_lz v0, v[0:1], s[52:59], s[24:27] dmask:0x1
; CHECK-NEXT: v_readlane_b32 s46, v7, 2
; CHECK-NEXT: v_readlane_b32 s47, v7, 3
; CHECK-NEXT: image_sample_lz v0, v[0:1], s[52:59], s[24:27] dmask:0x1
; CHECK-NEXT: v_readlane_b32 s48, v7, 4
; CHECK-NEXT: v_readlane_b32 s49, v7, 5
; CHECK-NEXT: v_readlane_b32 s50, v7, 6
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1826,10 +1826,10 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec,
; GCN-NEXT: s_or_b32 s0, s0, s5
; GCN-NEXT: s_and_b32 s0, s0, 0xffff
; GCN-NEXT: s_or_b32 s0, s0, s4
; GCN-NEXT: v_mov_b32_e32 v1, s1
; GCN-NEXT: v_readlane_b32 s1, v6, 1
; GCN-NEXT: v_mov_b32_e32 v0, s0
; GCN-NEXT: v_mov_b32_e32 v1, s1
; GCN-NEXT: v_readlane_b32 s0, v6, 0
; GCN-NEXT: v_readlane_b32 s1, v6, 1
; GCN-NEXT: v_mov_b32_e32 v5, s1
; GCN-NEXT: v_mov_b32_e32 v2, s2
; GCN-NEXT: v_mov_b32_e32 v3, s3
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10334,11 +10334,11 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX8-NEXT: v_writelane_b32 v62, s3, 5
; GFX8-NEXT: v_readlane_b32 s2, v62, 2
; GFX8-NEXT: v_readlane_b32 s3, v62, 3
; GFX8-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x10000
; GFX8-NEXT: v_mov_b32_e32 v35, s49
; GFX8-NEXT: s_bfe_i64 s[48:49], s[4:5], 0x10000
; GFX8-NEXT: s_bfe_i64 s[4:5], s[2:3], 0x10000
; GFX8-NEXT: v_readlane_b32 s2, v62, 0
; GFX8-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x10000
; GFX8-NEXT: v_readlane_b32 s3, v62, 1
; GFX8-NEXT: v_mov_b32_e32 v5, s75
; GFX8-NEXT: v_mov_b32_e32 v13, s73
Expand Down Expand Up @@ -10632,8 +10632,8 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX8-NEXT: v_mov_b32_e32 v2, s34
; GFX8-NEXT: v_mov_b32_e32 v3, s35
; GFX8-NEXT: v_mov_b32_e32 v4, s2
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_readlane_b32 s2, v62, 4
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: v_readlane_b32 s3, v62, 5
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: v_mov_b32_e32 v0, s30
Expand Down
Loading
Loading