diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll index bc2ed12067ada1..26b8b63ad5d6d9 100644 --- a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll @@ -567,16 +567,16 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg ; GFX908-NEXT: s_add_i32 s1, s9, s1 ; GFX908-NEXT: s_lshl_b64 s[0:1], s[0:1], 5 ; GFX908-NEXT: s_branch .LBB3_2 -; GFX908-NEXT: .LBB3_1: ; %Flow20 +; GFX908-NEXT: .LBB3_1: ; %Flow20 ; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1 ; GFX908-NEXT: s_andn2_b64 vcc, exec, s[14:15] ; GFX908-NEXT: s_cbranch_vccz .LBB3_12 -; GFX908-NEXT: .LBB3_2: ; %bb9 +; GFX908-NEXT: .LBB3_2: ; %bb9 ; GFX908-NEXT: ; =>This Loop Header: Depth=1 ; GFX908-NEXT: ; Child Loop BB3_5 Depth 2 ; GFX908-NEXT: s_mov_b64 s[16:17], -1 ; GFX908-NEXT: s_cbranch_scc0 .LBB3_10 -; GFX908-NEXT: ; %bb.3: ; %bb14 +; GFX908-NEXT: ; %bb.3: ; %bb14 ; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1 ; GFX908-NEXT: global_load_dwordx2 v[2:3], v[0:1], off ; GFX908-NEXT: s_mov_b32 s9, s8 @@ -655,6 +655,35 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg ; GFX908-NEXT: v_add_f32_e32 v11, v11, v13 ; GFX908-NEXT: s_mov_b64 s[22:23], -1 ; GFX908-NEXT: s_branch .LBB3_4 +; GFX908-NEXT: .LBB3_7: ; in Loop: Header=BB3_5 Depth=2 +; GFX908-NEXT: s_mov_b64 s[22:23], s[14:15] +; GFX908-NEXT: s_andn2_b64 vcc, exec, s[22:23] +; GFX908-NEXT: s_cbranch_vccz .LBB3_4 +; GFX908-NEXT: ; %bb.8: ; in Loop: Header=BB3_2 Depth=1 +; GFX908-NEXT: ; implicit-def: $vgpr10_vgpr11 +; GFX908-NEXT: ; implicit-def: $vgpr6_vgpr7 +; GFX908-NEXT: ; implicit-def: $vgpr8_vgpr9 +; GFX908-NEXT: ; implicit-def: $vgpr4_vgpr5 +; GFX908-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX908-NEXT: ; implicit-def: $sgpr20_sgpr21 +; GFX908-NEXT: .LBB3_9: ; %loop.exit.guard +; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1 +; GFX908-NEXT: s_xor_b64 s[16:17], s[22:23], -1 +; GFX908-NEXT: .LBB3_10: ; %Flow19 +; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1 +; GFX908-NEXT: s_mov_b64 s[14:15], -1 +; GFX908-NEXT: s_and_b64 vcc, exec, s[16:17] +; GFX908-NEXT: s_cbranch_vccz .LBB3_1 +; GFX908-NEXT: ; %bb.11: ; %bb12 +; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1 +; GFX908-NEXT: s_add_u32 s6, s6, s4 +; GFX908-NEXT: s_addc_u32 s7, s7, 0 +; GFX908-NEXT: s_add_u32 s10, s10, s12 +; GFX908-NEXT: s_addc_u32 s11, s11, s13 +; GFX908-NEXT: s_mov_b64 s[14:15], 0 +; GFX908-NEXT: s_branch .LBB3_1 +; GFX908-NEXT: .LBB3_12: ; %DummyReturnBlock +; GFX908-NEXT: s_endpgm ; ; GFX90A-LABEL: introduced_copy_to_sgpr: ; GFX90A: ; %bb.0: ; %bb @@ -780,26 +809,26 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg ; GFX90A-NEXT: v_pk_add_f32 v[12:13], v[12:13], v[14:15] ; GFX90A-NEXT: s_mov_b64 s[22:23], -1 ; GFX90A-NEXT: s_branch .LBB3_4 -; GFX90A-NEXT: .LBB3_7: ; in Loop: Header=BB3_5 Depth=2 +; GFX90A-NEXT: .LBB3_7: ; in Loop: Header=BB3_5 Depth=2 ; GFX90A-NEXT: s_mov_b64 s[22:23], s[14:15] ; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[22:23] ; GFX90A-NEXT: s_cbranch_vccz .LBB3_4 -; GFX90A-NEXT: ; %bb.8: ; in Loop: Header=BB3_2 Depth=1 +; GFX90A-NEXT: ; %bb.8: ; in Loop: Header=BB3_2 Depth=1 ; GFX90A-NEXT: ; implicit-def: $vgpr12_vgpr13 ; GFX90A-NEXT: ; implicit-def: $vgpr8_vgpr9 ; GFX90A-NEXT: ; implicit-def: $vgpr10_vgpr11 ; GFX90A-NEXT: ; implicit-def: $vgpr6_vgpr7 ; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5 ; GFX90A-NEXT: ; implicit-def: $sgpr20_sgpr21 -; GFX90A-NEXT: .LBB3_9: ; %loop.exit.guard +; GFX90A-NEXT: .LBB3_9: ; %loop.exit.guard ; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1 ; GFX90A-NEXT: s_xor_b64 s[16:17], s[22:23], -1 -; GFX90A-NEXT: .LBB3_10: ; %Flow19 +; GFX90A-NEXT: .LBB3_10: ; %Flow19 ; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1 ; GFX90A-NEXT: s_mov_b64 s[14:15], -1 ; GFX90A-NEXT: s_and_b64 vcc, exec, s[16:17] ; GFX90A-NEXT: s_cbranch_vccz .LBB3_1 -; GFX90A-NEXT: ; %bb.11: ; %bb12 +; GFX90A-NEXT: ; %bb.11: ; %bb12 ; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1 ; GFX90A-NEXT: s_add_u32 s6, s6, s4 ; GFX90A-NEXT: s_addc_u32 s7, s7, 0 @@ -807,7 +836,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg ; GFX90A-NEXT: s_addc_u32 s11, s11, s13 ; GFX90A-NEXT: s_mov_b64 s[14:15], 0 ; GFX90A-NEXT: s_branch .LBB3_1 -; GFX90A-NEXT: .LBB3_12: ; %DummyReturnBlock +; GFX90A-NEXT: .LBB3_12: ; %DummyReturnBlock ; GFX90A-NEXT: s_endpgm bb: %i = load volatile i16, ptr addrspace(4) undef, align 2