diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/add_shl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/add_shl.ll index b68df4fbbbb9e..59036338eaf15 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/add_shl.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/add_shl.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=fiji < %s | FileCheck -check-prefix=VI %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=fiji < %s | FileCheck -check-prefix=VI %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s ; =================================================================================== ; V_ADD_LSHL_U32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.i1.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.i1.ll index 74422a1962344..25d70002a7a8e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.i1.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.i1.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefix=WAVE64 %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefix=WAVE32 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefix=WAVE64 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefix=WAVE32 %s define i32 @s_andn2_i1_vcc(i32 %arg0, i32 %arg1) { ; WAVE64-LABEL: s_andn2_i1_vcc: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-asserts.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-asserts.ll index cdcc3a4f27071..fae3a75101ee5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-asserts.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-asserts.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 < %s | FileCheck %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 < %s | FileCheck %s define hidden <2 x i64> @icmp_v2i32_sext_to_v2i64(<2 x i32> %arg) { ; CHECK-LABEL: icmp_v2i32_sext_to_v2i64: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/br-constant-invalid-sgpr-copy.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/br-constant-invalid-sgpr-copy.ll index 439ffbac960b8..22324e62c2ab5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/br-constant-invalid-sgpr-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/br-constant-invalid-sgpr-copy.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=WAVE64 %s -; RUN: llc -global-isel -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 < %s | FileCheck -check-prefix=WAVE32 %s +; RUN: llc -global-isel -new-reg-bank-select -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=WAVE64 %s +; RUN: llc -global-isel -new-reg-bank-select -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 < %s | FileCheck -check-prefix=WAVE32 %s ; This was mishandling the constant true and false values used as a ; scalar branch condition. @@ -76,7 +76,8 @@ define void @br_undef() { ; WAVE64-NEXT: .LBB2_1: ; %bb0 ; WAVE64-NEXT: ; =>This Inner Loop Header: Depth=1 ; WAVE64-NEXT: ; implicit-def: $sgpr4 -; WAVE64-NEXT: s_and_b32 s4, s4, 1 +; WAVE64-NEXT: s_mov_b32 s5, 1 +; WAVE64-NEXT: s_and_b32 s4, s4, s5 ; WAVE64-NEXT: s_cmp_lg_u32 s4, 0 ; WAVE64-NEXT: s_cbranch_scc1 .LBB2_1 ; WAVE64-NEXT: ; %bb.2: ; %.exit5 @@ -88,7 +89,8 @@ define void @br_undef() { ; WAVE32-NEXT: .LBB2_1: ; %bb0 ; WAVE32-NEXT: ; =>This Inner Loop Header: Depth=1 ; WAVE32-NEXT: ; implicit-def: $sgpr4 -; WAVE32-NEXT: s_and_b32 s4, s4, 1 +; WAVE32-NEXT: s_mov_b32 s5, 1 +; WAVE32-NEXT: s_and_b32 s4, s4, s5 ; WAVE32-NEXT: s_cmp_lg_u32 s4, 0 ; WAVE32-NEXT: s_cbranch_scc1 .LBB2_1 ; WAVE32-NEXT: ; %bb.2: ; %.exit5 @@ -110,7 +112,8 @@ define void @br_poison() { ; WAVE64-NEXT: .LBB3_1: ; %bb0 ; WAVE64-NEXT: ; =>This Inner Loop Header: Depth=1 ; WAVE64-NEXT: ; implicit-def: $sgpr4 -; WAVE64-NEXT: s_and_b32 s4, s4, 1 +; WAVE64-NEXT: s_mov_b32 s5, 1 +; WAVE64-NEXT: s_and_b32 s4, s4, s5 ; WAVE64-NEXT: s_cmp_lg_u32 s4, 0 ; WAVE64-NEXT: s_cbranch_scc1 .LBB3_1 ; WAVE64-NEXT: ; %bb.2: ; %.exit5 @@ -122,7 +125,8 @@ define void @br_poison() { ; WAVE32-NEXT: .LBB3_1: ; %bb0 ; WAVE32-NEXT: ; =>This Inner Loop Header: Depth=1 ; WAVE32-NEXT: ; implicit-def: $sgpr4 -; WAVE32-NEXT: s_and_b32 s4, s4, 1 +; WAVE32-NEXT: s_mov_b32 s5, 1 +; WAVE32-NEXT: s_and_b32 s4, s4, s5 ; WAVE32-NEXT: s_cmp_lg_u32 s4, 0 ; WAVE32-NEXT: s_cbranch_scc1 .LBB3_1 ; WAVE32-NEXT: ; %bb.2: ; %.exit5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain.ll index 2d3088f3edb72..917cdb3f49a26 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn < %s | FileCheck %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn < %s | FileCheck %s define amdgpu_cs i32 @test_shl_1(i32 inreg %arg1) { ; CHECK-LABEL: test_shl_1: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-of-shifted-logic.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-of-shifted-logic.ll index 5532443c0dfc8..914a26b2fb525 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-of-shifted-logic.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-of-shifted-logic.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn < %s | FileCheck %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn < %s | FileCheck %s define amdgpu_cs i32 @test_shl_and_1(i32 inreg %arg1) { ; CHECK-LABEL: test_shl_and_1: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll index a8a75cd2ffaa8..dd01112d97a18 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s ; This file contains various tests that have divergent i1s used outside of ; the loop. These are lane masks is sgpr and need to have correct value in @@ -13,30 +13,27 @@ define void @divergent_i1_phi_used_outside_loop(float %val, float %pre.cond.val, ; GFX10-LABEL: divergent_i1_phi_used_outside_loop: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 ; GFX10-NEXT: v_cmp_lt_f32_e64 s5, 1.0, v1 -; GFX10-NEXT: v_mov_b32_e32 v1, s4 -; GFX10-NEXT: ; implicit-def: $sgpr6 +; GFX10-NEXT: s_mov_b32 s4, 0 +; GFX10-NEXT: s_mov_b32 s6, 0 ; GFX10-NEXT: ; implicit-def: $sgpr7 ; GFX10-NEXT: .LBB0_1: ; %loop ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_cvt_f32_u32_e32 v4, v1 -; GFX10-NEXT: s_xor_b32 s8, s5, -1 -; GFX10-NEXT: v_add_nc_u32_e32 v1, 1, v1 -; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v4, v0 +; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s6 +; GFX10-NEXT: s_mov_b32 s8, exec_lo +; GFX10-NEXT: s_add_i32 s6, s6, 1 +; GFX10-NEXT: s_xor_b32 s8, s5, s8 +; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v0 ; GFX10-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX10-NEXT: s_andn2_b32 s7, s7, exec_lo -; GFX10-NEXT: s_and_b32 s5, exec_lo, s5 -; GFX10-NEXT: s_andn2_b32 s6, s6, exec_lo -; GFX10-NEXT: s_or_b32 s7, s7, s5 +; GFX10-NEXT: s_and_b32 s9, exec_lo, s5 ; GFX10-NEXT: s_mov_b32 s5, s8 -; GFX10-NEXT: s_and_b32 s9, exec_lo, s7 -; GFX10-NEXT: s_or_b32 s6, s6, s9 +; GFX10-NEXT: s_or_b32 s7, s7, s9 ; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: s_cbranch_execnz .LBB0_1 ; GFX10-NEXT: ; %bb.2: ; %exit ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s6 +; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s7 ; GFX10-NEXT: flat_store_dword v[2:3], v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -63,43 +60,44 @@ define void @divergent_i1_phi_used_outside_loop_larger_loop_body(float %val, ptr ; GFX10-LABEL: divergent_i1_phi_used_outside_loop_larger_loop_body: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, -1 -; GFX10-NEXT: ; implicit-def: $sgpr6 -; GFX10-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-NEXT: s_andn2_b32 s5, s4, exec_lo -; GFX10-NEXT: s_and_b32 s4, exec_lo, -1 -; GFX10-NEXT: s_or_b32 s4, s5, s4 +; GFX10-NEXT: s_and_b32 s6, exec_lo, exec_lo +; GFX10-NEXT: s_mov_b32 s4, -1 +; GFX10-NEXT: s_or_b32 s7, s5, s6 +; GFX10-NEXT: ; implicit-def: $sgpr5 ; GFX10-NEXT: s_branch .LBB1_2 ; GFX10-NEXT: .LBB1_1: ; %loop.cond ; GFX10-NEXT: ; in Loop: Header=BB1_2 Depth=1 -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 -; GFX10-NEXT: v_add_co_u32 v1, s4, v1, 4 -; GFX10-NEXT: v_add_nc_u32_e32 v0, 1, v0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v2, s4, 0, v2, s4 -; GFX10-NEXT: s_andn2_b32 s7, s5, exec_lo -; GFX10-NEXT: s_and_b32 s8, exec_lo, s6 -; GFX10-NEXT: v_cmp_le_i32_e32 vcc_lo, 10, v0 -; GFX10-NEXT: s_or_b32 s4, s7, s8 -; GFX10-NEXT: s_cbranch_vccz .LBB1_4 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s7 +; GFX10-NEXT: s_add_i32 s4, s4, 1 +; GFX10-NEXT: v_add_co_u32 v1, vcc_lo, v1, 4 +; GFX10-NEXT: s_cmp_ge_i32 s4, 10 +; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, 0, v2, vcc_lo +; GFX10-NEXT: s_cselect_b32 s8, 1, 0 +; GFX10-NEXT: s_andn2_b32 s7, s6, exec_lo +; GFX10-NEXT: s_and_b32 s9, exec_lo, s5 +; GFX10-NEXT: s_or_b32 s7, s7, s9 +; GFX10-NEXT: s_cmp_lg_u32 s8, 0 +; GFX10-NEXT: s_cbranch_scc0 .LBB1_4 ; GFX10-NEXT: .LBB1_2: ; %loop.start ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: s_mov_b32 s5, s4 -; GFX10-NEXT: s_andn2_b32 s4, s6, exec_lo -; GFX10-NEXT: s_and_b32 s6, exec_lo, s5 -; GFX10-NEXT: s_or_b32 s6, s4, s6 -; GFX10-NEXT: s_and_saveexec_b32 s4, s5 +; GFX10-NEXT: s_mov_b32 s6, s7 +; GFX10-NEXT: s_andn2_b32 s5, s5, exec_lo +; GFX10-NEXT: s_and_b32 s7, exec_lo, s7 +; GFX10-NEXT: s_or_b32 s5, s5, s7 +; GFX10-NEXT: s_and_saveexec_b32 s7, s6 ; GFX10-NEXT: s_cbranch_execz .LBB1_1 ; GFX10-NEXT: ; %bb.3: ; %is.eq.zero ; GFX10-NEXT: ; in Loop: Header=BB1_2 Depth=1 -; GFX10-NEXT: global_load_dword v5, v[1:2], off -; GFX10-NEXT: s_andn2_b32 s6, s6, exec_lo +; GFX10-NEXT: global_load_dword v0, v[1:2], off +; GFX10-NEXT: s_andn2_b32 s5, s5, exec_lo ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v5 -; GFX10-NEXT: s_and_b32 s7, exec_lo, vcc_lo -; GFX10-NEXT: s_or_b32 s6, s6, s7 +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX10-NEXT: s_and_b32 s8, exec_lo, vcc_lo +; GFX10-NEXT: s_or_b32 s5, s5, s8 ; GFX10-NEXT: s_branch .LBB1_1 ; GFX10-NEXT: .LBB1_4: ; %exit -; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s5 +; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s6 ; GFX10-NEXT: flat_store_dword v[3:4], v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -135,29 +133,26 @@ define void @divergent_i1_xor_used_outside_loop(float %val, float %pre.cond.val, ; GFX10-LABEL: divergent_i1_xor_used_outside_loop: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 ; GFX10-NEXT: v_cmp_lt_f32_e64 s5, 1.0, v1 -; GFX10-NEXT: v_mov_b32_e32 v1, s4 -; GFX10-NEXT: ; implicit-def: $sgpr6 +; GFX10-NEXT: s_mov_b32 s4, 0 +; GFX10-NEXT: s_mov_b32 s6, 0 ; GFX10-NEXT: ; implicit-def: $sgpr7 ; GFX10-NEXT: .LBB2_1: ; %loop ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_cvt_f32_u32_e32 v4, v1 -; GFX10-NEXT: s_xor_b32 s5, s5, -1 -; GFX10-NEXT: v_add_nc_u32_e32 v1, 1, v1 -; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v4, v0 +; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s6 +; GFX10-NEXT: s_mov_b32 s8, exec_lo +; GFX10-NEXT: s_add_i32 s6, s6, 1 +; GFX10-NEXT: s_xor_b32 s5, s5, s8 +; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v0 ; GFX10-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX10-NEXT: s_andn2_b32 s7, s7, exec_lo ; GFX10-NEXT: s_and_b32 s8, exec_lo, s5 -; GFX10-NEXT: s_andn2_b32 s6, s6, exec_lo ; GFX10-NEXT: s_or_b32 s7, s7, s8 -; GFX10-NEXT: s_and_b32 s8, exec_lo, s7 -; GFX10-NEXT: s_or_b32 s6, s6, s8 ; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: s_cbranch_execnz .LBB2_1 ; GFX10-NEXT: ; %bb.2: ; %exit ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 -; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s6 +; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s7 ; GFX10-NEXT: flat_store_dword v[2:3], v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -184,23 +179,20 @@ define void @divergent_i1_xor_used_outside_loop_twice(float %val, float %pre.con ; GFX10-LABEL: divergent_i1_xor_used_outside_loop_twice: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s4, 0 ; GFX10-NEXT: v_cmp_lt_f32_e64 s5, 1.0, v1 -; GFX10-NEXT: v_mov_b32_e32 v1, s4 +; GFX10-NEXT: s_mov_b32 s4, 0 +; GFX10-NEXT: s_mov_b32 s7, 0 ; GFX10-NEXT: ; implicit-def: $sgpr6 -; GFX10-NEXT: ; implicit-def: $sgpr7 ; GFX10-NEXT: .LBB3_1: ; %loop ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_cvt_f32_u32_e32 v6, v1 -; GFX10-NEXT: s_xor_b32 s5, s5, -1 -; GFX10-NEXT: v_add_nc_u32_e32 v1, 1, v1 -; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v6, v0 +; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s7 +; GFX10-NEXT: s_mov_b32 s8, exec_lo +; GFX10-NEXT: s_add_i32 s7, s7, 1 +; GFX10-NEXT: s_xor_b32 s5, s5, s8 +; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v0 ; GFX10-NEXT: s_or_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_andn2_b32 s7, s7, exec_lo -; GFX10-NEXT: s_and_b32 s8, exec_lo, s5 ; GFX10-NEXT: s_andn2_b32 s6, s6, exec_lo -; GFX10-NEXT: s_or_b32 s7, s7, s8 -; GFX10-NEXT: s_and_b32 s8, exec_lo, s7 +; GFX10-NEXT: s_and_b32 s8, exec_lo, s5 ; GFX10-NEXT: s_or_b32 s6, s6, s8 ; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: s_cbranch_execnz .LBB3_1 @@ -247,66 +239,64 @@ define void @divergent_i1_xor_used_outside_loop_larger_loop_body(i32 %num.elts, ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-NEXT: s_mov_b32 s5, 0 -; GFX10-NEXT: s_mov_b32 s6, -1 -; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo +; GFX10-NEXT: s_mov_b32 s6, exec_lo +; GFX10-NEXT: s_mov_b32 s8, 0 +; GFX10-NEXT: s_and_saveexec_b32 s7, vcc_lo ; GFX10-NEXT: s_cbranch_execz .LBB4_6 ; GFX10-NEXT: ; %bb.1: ; %loop.start.preheader -; GFX10-NEXT: v_mov_b32_e32 v5, s5 -; GFX10-NEXT: ; implicit-def: $sgpr6 -; GFX10-NEXT: ; implicit-def: $sgpr8 +; GFX10-NEXT: s_mov_b32 s4, 0 +; GFX10-NEXT: ; implicit-def: $sgpr10 +; GFX10-NEXT: ; implicit-def: $sgpr11 ; GFX10-NEXT: ; implicit-def: $sgpr9 -; GFX10-NEXT: ; implicit-def: $sgpr7 ; GFX10-NEXT: s_branch .LBB4_3 ; GFX10-NEXT: .LBB4_2: ; %Flow ; GFX10-NEXT: ; in Loop: Header=BB4_3 Depth=1 -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s10 -; GFX10-NEXT: s_xor_b32 s10, s9, -1 -; GFX10-NEXT: s_and_b32 s11, exec_lo, s8 -; GFX10-NEXT: s_or_b32 s5, s11, s5 -; GFX10-NEXT: s_andn2_b32 s7, s7, exec_lo -; GFX10-NEXT: s_and_b32 s10, exec_lo, s10 -; GFX10-NEXT: s_andn2_b32 s6, s6, exec_lo -; GFX10-NEXT: s_or_b32 s7, s7, s10 -; GFX10-NEXT: s_and_b32 s10, exec_lo, s7 -; GFX10-NEXT: s_or_b32 s6, s6, s10 -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: s_xor_b32 s5, s11, exec_lo +; GFX10-NEXT: s_and_b32 s12, exec_lo, s10 +; GFX10-NEXT: s_or_b32 s8, s12, s8 +; GFX10-NEXT: s_andn2_b32 s9, s9, exec_lo +; GFX10-NEXT: s_and_b32 s5, exec_lo, s5 +; GFX10-NEXT: s_or_b32 s9, s9, s5 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s8 ; GFX10-NEXT: s_cbranch_execz .LBB4_5 ; GFX10-NEXT: .LBB4_3: ; %loop.start ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_ashrrev_i32_e32 v6, 31, v5 -; GFX10-NEXT: s_andn2_b32 s9, s9, exec_lo -; GFX10-NEXT: s_and_b32 s10, exec_lo, -1 -; GFX10-NEXT: s_andn2_b32 s8, s8, exec_lo -; GFX10-NEXT: s_or_b32 s9, s9, s10 -; GFX10-NEXT: v_lshlrev_b64 v[6:7], 2, v[5:6] -; GFX10-NEXT: s_or_b32 s8, s8, s10 -; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v1, v6 -; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, v2, v7, vcc_lo -; GFX10-NEXT: global_load_dword v6, v[6:7], off +; GFX10-NEXT: s_ashr_i32 s5, s4, 31 +; GFX10-NEXT: s_andn2_b32 s10, s10, exec_lo +; GFX10-NEXT: s_lshl_b64 s[12:13], s[4:5], 2 +; GFX10-NEXT: s_andn2_b32 s5, s11, exec_lo +; GFX10-NEXT: v_mov_b32_e32 v5, s12 +; GFX10-NEXT: v_mov_b32_e32 v6, s13 +; GFX10-NEXT: s_and_b32 s11, exec_lo, exec_lo +; GFX10-NEXT: s_and_b32 s12, exec_lo, exec_lo +; GFX10-NEXT: s_or_b32 s11, s5, s11 +; GFX10-NEXT: v_add_co_u32 v5, vcc_lo, v1, v5 +; GFX10-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, v2, v6, vcc_lo +; GFX10-NEXT: s_or_b32 s10, s10, s12 +; GFX10-NEXT: global_load_dword v5, v[5:6], off ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 -; GFX10-NEXT: s_and_saveexec_b32 s10, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v5 +; GFX10-NEXT: s_and_saveexec_b32 s5, vcc_lo ; GFX10-NEXT: s_cbranch_execz .LBB4_2 ; GFX10-NEXT: ; %bb.4: ; %loop.cond ; GFX10-NEXT: ; in Loop: Header=BB4_3 Depth=1 -; GFX10-NEXT: v_add_nc_u32_e32 v6, 1, v5 -; GFX10-NEXT: v_cmp_lt_i32_e32 vcc_lo, v5, v0 -; GFX10-NEXT: s_andn2_b32 s9, s9, exec_lo -; GFX10-NEXT: s_and_b32 s11, exec_lo, 0 -; GFX10-NEXT: s_andn2_b32 s8, s8, exec_lo -; GFX10-NEXT: v_mov_b32_e32 v5, v6 -; GFX10-NEXT: s_and_b32 s12, exec_lo, vcc_lo -; GFX10-NEXT: s_or_b32 s9, s9, s11 -; GFX10-NEXT: s_or_b32 s8, s8, s12 +; GFX10-NEXT: v_cmp_lt_i32_e32 vcc_lo, s4, v0 +; GFX10-NEXT: s_andn2_b32 s11, s11, exec_lo +; GFX10-NEXT: s_and_b32 s12, exec_lo, 0 +; GFX10-NEXT: s_andn2_b32 s10, s10, exec_lo +; GFX10-NEXT: s_add_i32 s4, s4, 1 +; GFX10-NEXT: s_and_b32 s13, exec_lo, vcc_lo +; GFX10-NEXT: s_or_b32 s11, s11, s12 +; GFX10-NEXT: s_or_b32 s10, s10, s13 ; GFX10-NEXT: s_branch .LBB4_2 ; GFX10-NEXT: .LBB4_5: ; %loop.exit.guard -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5 -; GFX10-NEXT: s_andn2_b32 s5, -1, exec_lo -; GFX10-NEXT: s_and_b32 s6, exec_lo, s6 -; GFX10-NEXT: s_or_b32 s6, s5, s6 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8 +; GFX10-NEXT: s_andn2_b32 s4, s6, exec_lo +; GFX10-NEXT: s_and_b32 s5, exec_lo, s9 +; GFX10-NEXT: s_or_b32 s6, s4, s5 ; GFX10-NEXT: .LBB4_6: ; %Flow1 -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s7 ; GFX10-NEXT: s_and_saveexec_b32 s4, s6 ; GFX10-NEXT: s_cbranch_execz .LBB4_8 ; GFX10-NEXT: ; %bb.7: ; %block.after.loop @@ -355,56 +345,54 @@ define void @divergent_i1_icmp_used_outside_loop(i32 %v0, i32 %v1, ptr addrspace ; GFX10-LABEL: divergent_i1_icmp_used_outside_loop: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_mov_b32 s6, 0 ; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: ; implicit-def: $sgpr5 -; GFX10-NEXT: ; implicit-def: $sgpr6 -; GFX10-NEXT: v_mov_b32_e32 v5, s4 +; GFX10-NEXT: ; implicit-def: $sgpr7 ; GFX10-NEXT: s_branch .LBB5_2 ; GFX10-NEXT: .LBB5_1: ; %Flow ; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1 ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8 -; GFX10-NEXT: s_and_b32 s7, exec_lo, s7 -; GFX10-NEXT: s_or_b32 s4, s7, s4 -; GFX10-NEXT: s_andn2_b32 s5, s5, exec_lo -; GFX10-NEXT: s_and_b32 s7, exec_lo, s6 -; GFX10-NEXT: s_or_b32 s5, s5, s7 -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: s_and_b32 s5, exec_lo, s5 +; GFX10-NEXT: s_or_b32 s6, s5, s6 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s6 ; GFX10-NEXT: s_cbranch_execz .LBB5_6 ; GFX10-NEXT: .LBB5_2: ; %cond.block.0 ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_mov_b32_e32 v4, v5 -; GFX10-NEXT: s_andn2_b32 s6, s6, exec_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v4 +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, s4, v0 +; GFX10-NEXT: v_mov_b32_e32 v4, s4 +; GFX10-NEXT: s_andn2_b32 s5, s7, exec_lo ; GFX10-NEXT: s_and_b32 s7, exec_lo, vcc_lo -; GFX10-NEXT: s_or_b32 s6, s6, s7 -; GFX10-NEXT: s_and_saveexec_b32 s7, vcc_lo +; GFX10-NEXT: s_or_b32 s7, s5, s7 +; GFX10-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GFX10-NEXT: s_cbranch_execz .LBB5_4 ; GFX10-NEXT: ; %bb.3: ; %if.block.0 ; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1 -; GFX10-NEXT: v_ashrrev_i32_e32 v5, 31, v4 -; GFX10-NEXT: v_lshlrev_b64 v[8:9], 2, v[4:5] +; GFX10-NEXT: s_ashr_i32 s5, s4, 31 +; GFX10-NEXT: v_mov_b32_e32 v5, s4 +; GFX10-NEXT: s_lshl_b64 s[10:11], s[4:5], 2 +; GFX10-NEXT: v_mov_b32_e32 v8, s10 +; GFX10-NEXT: v_mov_b32_e32 v9, s11 ; GFX10-NEXT: v_add_co_u32 v8, vcc_lo, v2, v8 ; GFX10-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v3, v9, vcc_lo -; GFX10-NEXT: global_store_dword v[8:9], v4, off +; GFX10-NEXT: global_store_dword v[8:9], v5, off ; GFX10-NEXT: .LBB5_4: ; %loop.break.block ; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s7 -; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, v1, v4 -; GFX10-NEXT: s_mov_b32 s7, -1 -; GFX10-NEXT: ; implicit-def: $vgpr5 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8 +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, s4, v1 +; GFX10-NEXT: s_mov_b32 s5, exec_lo ; GFX10-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GFX10-NEXT: s_cbranch_execz .LBB5_1 ; GFX10-NEXT: ; %bb.5: ; %loop.cond ; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1 -; GFX10-NEXT: v_add_nc_u32_e32 v5, 1, v4 -; GFX10-NEXT: s_andn2_b32 s7, -1, exec_lo +; GFX10-NEXT: s_andn2_b32 s5, s5, exec_lo ; GFX10-NEXT: s_and_b32 s9, exec_lo, 0 -; GFX10-NEXT: s_or_b32 s7, s7, s9 +; GFX10-NEXT: s_add_i32 s4, s4, 1 +; GFX10-NEXT: s_or_b32 s5, s5, s9 ; GFX10-NEXT: s_branch .LBB5_1 ; GFX10-NEXT: .LBB5_6: ; %cond.block.1 -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 -; GFX10-NEXT: s_and_saveexec_b32 s4, s5 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s6 +; GFX10-NEXT: s_and_saveexec_b32 s4, s7 ; GFX10-NEXT: s_cbranch_execz .LBB5_8 ; GFX10-NEXT: ; %bb.7: ; %if.block.1 ; GFX10-NEXT: global_store_dword v[6:7], v4, off @@ -468,52 +456,50 @@ exit: define amdgpu_ps void @divergent_i1_freeze_used_outside_loop(i32 %n, ptr addrspace(1) %a, ptr %addr) { ; GFX10-LABEL: divergent_i1_freeze_used_outside_loop: ; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_mov_b32 s1, exec_lo +; GFX10-NEXT: s_mov_b32 s2, 0 ; GFX10-NEXT: s_mov_b32 s0, 0 -; GFX10-NEXT: s_mov_b32 s4, -1 -; GFX10-NEXT: v_mov_b32_e32 v5, s0 +; GFX10-NEXT: ; implicit-def: $sgpr4 ; GFX10-NEXT: ; implicit-def: $sgpr3 -; GFX10-NEXT: ; implicit-def: $sgpr1 -; GFX10-NEXT: ; implicit-def: $sgpr2 ; GFX10-NEXT: s_branch .LBB6_2 ; GFX10-NEXT: .LBB6_1: ; %loop.cond ; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1 ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5 -; GFX10-NEXT: v_cmp_lt_i32_e32 vcc_lo, v5, v0 -; GFX10-NEXT: v_add_nc_u32_e32 v5, 1, v5 -; GFX10-NEXT: s_or_b32 s0, vcc_lo, s0 -; GFX10-NEXT: s_andn2_b32 s2, s2, exec_lo -; GFX10-NEXT: s_and_b32 s5, exec_lo, s1 -; GFX10-NEXT: s_andn2_b32 s4, s4, exec_lo -; GFX10-NEXT: s_or_b32 s2, s2, s5 +; GFX10-NEXT: v_cmp_lt_i32_e32 vcc_lo, s0, v0 +; GFX10-NEXT: s_add_i32 s0, s0, 1 +; GFX10-NEXT: s_or_b32 s2, vcc_lo, s2 ; GFX10-NEXT: s_andn2_b32 s3, s3, exec_lo -; GFX10-NEXT: s_and_b32 s6, exec_lo, s2 -; GFX10-NEXT: s_or_b32 s4, s4, s5 -; GFX10-NEXT: s_or_b32 s3, s3, s6 -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s0 +; GFX10-NEXT: s_and_b32 s5, exec_lo, s4 +; GFX10-NEXT: s_andn2_b32 s1, s1, exec_lo +; GFX10-NEXT: s_or_b32 s3, s3, s5 +; GFX10-NEXT: s_or_b32 s1, s1, s5 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s2 ; GFX10-NEXT: s_cbranch_execz .LBB6_4 ; GFX10-NEXT: .LBB6_2: ; %loop.start ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: s_andn2_b32 s1, s1, exec_lo -; GFX10-NEXT: s_and_b32 s5, exec_lo, s4 -; GFX10-NEXT: s_or_b32 s1, s1, s5 -; GFX10-NEXT: s_and_saveexec_b32 s5, s4 +; GFX10-NEXT: s_andn2_b32 s4, s4, exec_lo +; GFX10-NEXT: s_and_b32 s5, exec_lo, s1 +; GFX10-NEXT: s_or_b32 s4, s4, s5 +; GFX10-NEXT: s_and_saveexec_b32 s5, s1 ; GFX10-NEXT: s_cbranch_execz .LBB6_1 ; GFX10-NEXT: ; %bb.3: ; %is.eq.zero ; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1 -; GFX10-NEXT: v_ashrrev_i32_e32 v6, 31, v5 -; GFX10-NEXT: s_andn2_b32 s1, s1, exec_lo -; GFX10-NEXT: v_lshlrev_b64 v[6:7], 2, v[5:6] -; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v1, v6 -; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, v2, v7, vcc_lo -; GFX10-NEXT: global_load_dword v6, v[6:7], off +; GFX10-NEXT: s_ashr_i32 s1, s0, 31 +; GFX10-NEXT: s_lshl_b64 s[6:7], s[0:1], 2 +; GFX10-NEXT: s_andn2_b32 s1, s4, exec_lo +; GFX10-NEXT: v_mov_b32_e32 v5, s6 +; GFX10-NEXT: v_mov_b32_e32 v6, s7 +; GFX10-NEXT: v_add_co_u32 v5, vcc_lo, v1, v5 +; GFX10-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, v2, v6, vcc_lo +; GFX10-NEXT: global_load_dword v5, v[5:6], off ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v6 +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v5 ; GFX10-NEXT: s_and_b32 s4, exec_lo, vcc_lo -; GFX10-NEXT: s_or_b32 s1, s1, s4 -; GFX10-NEXT: ; implicit-def: $sgpr4 +; GFX10-NEXT: s_or_b32 s4, s1, s4 +; GFX10-NEXT: ; implicit-def: $sgpr1 ; GFX10-NEXT: s_branch .LBB6_1 ; GFX10-NEXT: .LBB6_4: ; %exit -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s3 ; GFX10-NEXT: flat_store_dword v[3:4], v0 ; GFX10-NEXT: s_endpgm @@ -548,64 +534,67 @@ exit: define amdgpu_cs void @loop_with_1break(ptr addrspace(1) %x, ptr addrspace(1) %a, ptr addrspace(1) %a.break) { ; GFX10-LABEL: loop_with_1break: ; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_mov_b32 s4, 0 ; GFX10-NEXT: s_mov_b32 s0, 0 -; GFX10-NEXT: ; implicit-def: $sgpr1 -; GFX10-NEXT: ; implicit-def: $sgpr3 -; GFX10-NEXT: ; implicit-def: $sgpr4 -; GFX10-NEXT: ; implicit-def: $sgpr2 -; GFX10-NEXT: v_mov_b32_e32 v6, s0 +; GFX10-NEXT: ; implicit-def: $sgpr6 +; GFX10-NEXT: ; implicit-def: $sgpr7 +; GFX10-NEXT: ; implicit-def: $sgpr5 ; GFX10-NEXT: s_branch .LBB7_2 ; GFX10-NEXT: .LBB7_1: ; %Flow ; GFX10-NEXT: ; in Loop: Header=BB7_2 Depth=1 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5 -; GFX10-NEXT: s_and_b32 s5, exec_lo, s3 -; GFX10-NEXT: s_or_b32 s0, s5, s0 -; GFX10-NEXT: s_andn2_b32 s2, s2, exec_lo -; GFX10-NEXT: s_and_b32 s5, exec_lo, s4 -; GFX10-NEXT: s_andn2_b32 s1, s1, exec_lo -; GFX10-NEXT: s_or_b32 s2, s2, s5 -; GFX10-NEXT: s_and_b32 s5, exec_lo, s2 -; GFX10-NEXT: s_or_b32 s1, s1, s5 -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s0 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX10-NEXT: s_and_b32 s1, exec_lo, s6 +; GFX10-NEXT: s_or_b32 s4, s1, s4 +; GFX10-NEXT: s_andn2_b32 s1, s5, exec_lo +; GFX10-NEXT: s_and_b32 s2, exec_lo, s7 +; GFX10-NEXT: s_or_b32 s5, s1, s2 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: s_cbranch_execz .LBB7_4 ; GFX10-NEXT: .LBB7_2: ; %A ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_ashrrev_i32_e32 v7, 31, v6 -; GFX10-NEXT: s_andn2_b32 s4, s4, exec_lo -; GFX10-NEXT: s_and_b32 s5, exec_lo, -1 -; GFX10-NEXT: s_andn2_b32 s3, s3, exec_lo -; GFX10-NEXT: s_or_b32 s4, s4, s5 -; GFX10-NEXT: v_lshlrev_b64 v[7:8], 2, v[6:7] -; GFX10-NEXT: s_or_b32 s3, s3, s5 -; GFX10-NEXT: v_add_co_u32 v9, vcc_lo, v2, v7 -; GFX10-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v3, v8, vcc_lo -; GFX10-NEXT: global_load_dword v9, v[9:10], off +; GFX10-NEXT: s_ashr_i32 s1, s0, 31 +; GFX10-NEXT: s_mov_b32 s8, exec_lo +; GFX10-NEXT: s_lshl_b64 s[2:3], s[0:1], 2 +; GFX10-NEXT: s_andn2_b32 s1, s7, exec_lo +; GFX10-NEXT: v_mov_b32_e32 v7, s3 +; GFX10-NEXT: v_mov_b32_e32 v6, s2 +; GFX10-NEXT: s_and_b32 s7, exec_lo, s8 +; GFX10-NEXT: s_andn2_b32 s6, s6, exec_lo +; GFX10-NEXT: s_and_b32 s8, exec_lo, exec_lo +; GFX10-NEXT: s_or_b32 s7, s1, s7 +; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v2, v6 +; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, v3, v7, vcc_lo +; GFX10-NEXT: s_or_b32 s6, s6, s8 +; GFX10-NEXT: global_load_dword v6, v[6:7], off ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9 -; GFX10-NEXT: s_and_saveexec_b32 s5, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 +; GFX10-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX10-NEXT: s_cbranch_execz .LBB7_1 ; GFX10-NEXT: ; %bb.3: ; %loop.body ; GFX10-NEXT: ; in Loop: Header=BB7_2 Depth=1 -; GFX10-NEXT: v_add_co_u32 v7, vcc_lo, v0, v7 -; GFX10-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, v1, v8, vcc_lo -; GFX10-NEXT: v_add_nc_u32_e32 v10, 1, v6 -; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 0x64, v6 -; GFX10-NEXT: s_andn2_b32 s4, s4, exec_lo -; GFX10-NEXT: global_load_dword v9, v[7:8], off -; GFX10-NEXT: s_and_b32 s6, exec_lo, 0 -; GFX10-NEXT: v_mov_b32_e32 v6, v10 -; GFX10-NEXT: s_andn2_b32 s3, s3, exec_lo -; GFX10-NEXT: s_and_b32 s7, exec_lo, vcc_lo -; GFX10-NEXT: s_or_b32 s4, s4, s6 -; GFX10-NEXT: s_or_b32 s3, s3, s7 +; GFX10-NEXT: v_mov_b32_e32 v7, s3 +; GFX10-NEXT: v_mov_b32_e32 v6, s2 +; GFX10-NEXT: s_add_i32 s2, s0, 1 +; GFX10-NEXT: s_cmpk_lt_u32 s0, 0x64 +; GFX10-NEXT: s_cselect_b32 s0, exec_lo, 0 +; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v0, v6 +; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, v1, v7, vcc_lo +; GFX10-NEXT: s_andn2_b32 s3, s7, exec_lo +; GFX10-NEXT: s_and_b32 s7, exec_lo, 0 +; GFX10-NEXT: s_andn2_b32 s6, s6, exec_lo +; GFX10-NEXT: global_load_dword v8, v[6:7], off +; GFX10-NEXT: s_and_b32 s0, exec_lo, s0 +; GFX10-NEXT: s_or_b32 s7, s3, s7 +; GFX10-NEXT: s_or_b32 s6, s6, s0 +; GFX10-NEXT: s_mov_b32 s0, s2 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_add_nc_u32_e32 v9, 1, v9 -; GFX10-NEXT: global_store_dword v[7:8], v9, off +; GFX10-NEXT: v_add_nc_u32_e32 v8, 1, v8 +; GFX10-NEXT: global_store_dword v[6:7], v8, off ; GFX10-NEXT: s_branch .LBB7_1 ; GFX10-NEXT: .LBB7_4: ; %loop.exit.guard -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s0 -; GFX10-NEXT: s_and_saveexec_b32 s0, s1 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: s_and_saveexec_b32 s0, s5 ; GFX10-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX10-NEXT: s_cbranch_execz .LBB7_6 ; GFX10-NEXT: ; %bb.5: ; %break.body diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.gfx.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.gfx.ll index 4fdb4082346af..b520ce1826ec9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.gfx.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.gfx.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -mattr=+enable-flat-scratch -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=MESA %s -; RUN: llc -global-isel -mattr=+enable-flat-scratch -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=PAL %s +; RUN: llc -global-isel -new-reg-bank-select -mattr=+enable-flat-scratch -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=MESA %s +; RUN: llc -global-isel -new-reg-bank-select -mattr=+enable-flat-scratch -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=PAL %s ; Test that the initialization for flat_scratch doesn't crash. PAL ; doesn't add a user SGPR for initializing flat_scratch, mesa does diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll index 85c1d3a8c3ee4..390f62d8193f7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s -; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX11 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn--amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX11 %s define amdgpu_kernel void @test_wave32(i32 %arg0, [8 x i32], i32 %saved) { ; GFX10-LABEL: test_wave32: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll index ce8cba266aeec..67a388e29087e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s -; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX11 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn--amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX11 %s define amdgpu_kernel void @test_wave32(i32 %arg0, [8 x i32], i32 %saved) { ; GFX10-LABEL: test_wave32: @@ -10,9 +10,8 @@ define amdgpu_kernel void @test_wave32(i32 %arg0, [8 x i32], i32 %saved) { ; GFX10-NEXT: s_load_dword s1, s[8:9], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_cmp_eq_u32 s0, 0 -; GFX10-NEXT: s_cselect_b32 s0, 1, 0 -; GFX10-NEXT: s_and_b32 s0, 1, s0 -; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 +; GFX10-NEXT: s_cselect_b32 s0, exec_lo, 0 +; GFX10-NEXT: s_and_b32 s0, exec_lo, s0 ; GFX10-NEXT: s_or_b32 s0, s0, s1 ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: global_store_dword v[0:1], v0, off @@ -26,9 +25,8 @@ define amdgpu_kernel void @test_wave32(i32 %arg0, [8 x i32], i32 %saved) { ; GFX11-NEXT: s_load_b32 s1, s[4:5], 0x24 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_cmp_eq_u32 s0, 0 -; GFX11-NEXT: s_cselect_b32 s0, 1, 0 -; GFX11-NEXT: s_and_b32 s0, 1, s0 -; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 +; GFX11-NEXT: s_cselect_b32 s0, exec_lo, 0 +; GFX11-NEXT: s_and_b32 s0, exec_lo, s0 ; GFX11-NEXT: s_or_b32 s0, s0, s1 ; GFX11-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.implicit.ptr.buffer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.implicit.ptr.buffer.ll index 8d9f9d107b925..4687b83c744cc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.implicit.ptr.buffer.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.implicit.ptr.buffer.ll @@ -1,4 +1,4 @@ -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-mesa-mesa3d < %s | FileCheck -check-prefix=GCN %s ; FIXME: Dropped parts from original test diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.inline.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.inline.ll index 7c0484bf3f317..1ba2558a49ad4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.inline.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.inline.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn -mem-intrinsic-expand-size=3 %s -o - | FileCheck -check-prefix=GCN %s -; RUN: llc -global-isel -mtriple=amdgcn -mem-intrinsic-expand-size=5 %s -o - | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mem-intrinsic-expand-size=3 %s -o - | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mem-intrinsic-expand-size=5 %s -o - | FileCheck -check-prefix=GCN %s declare void @llvm.memcpy.inline.p1.p1.i32(ptr addrspace(1), ptr addrspace(1), i32, i1 immarg) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s32.mir index 45332c2870c02..4fdc8e435c23d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s32.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -o - | FileCheck %s --- name: add_s32_ss diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readfirstlane.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readfirstlane.mir index 04cdf2e9fce73..17fc2d0c23771 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readfirstlane.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readfirstlane.mir @@ -1,6 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" -o - %s | FileCheck %s --- name: readfirstlane_s diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.getpc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.getpc.mir index 9650da855ba5a..cba0db7163eeb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.getpc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.getpc.mir @@ -1,6 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" -o - %s | FileCheck %s --- name: getpc diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-bitcast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-bitcast.mir index 550f042618abf..66bdf4193010b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-bitcast.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-bitcast.mir @@ -1,6 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -o - | FileCheck %s --- name: bitcast_s diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-brcond.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-brcond.mir index 3b2b141539fc5..cd957c8b9c464 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-brcond.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-brcond.mir @@ -1,6 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -o - | FileCheck %s --- name: brcond_vcc_cond @@ -40,9 +39,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; CHECK-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[ICMP]], [[C]] + ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: bb.0.entry: @@ -66,9 +65,9 @@ body: | ; CHECK-NEXT: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) - ; CHECK-NEXT: G_BRCOND [[ZEXT]](s32), %bb.1 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: bb.0.entry: @@ -91,9 +90,11 @@ body: | ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: G_BRCOND [[COPY1]](s1), %bb.1 + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[AND]](s32), [[C1]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: bb.0.entry: @@ -120,13 +121,15 @@ body: | ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[AND]](s32), [[C1]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: G_BRCOND [[COPY1]](s1), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: bb.0.entry: @@ -157,9 +160,11 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: G_BRCOND [[COPY1]](s1), %bb.1 + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[AND]](s32), [[C1]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: bb.0.entry: @@ -189,10 +194,12 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[AND]](s32), [[C1]] ; CHECK-NEXT: S_NOP 0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK-NEXT: G_BRCOND [[COPY1]](s1), %bb.1 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: bb.0.entry: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-build-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-build-vector.mir index 0dc1165843e8f..ef2477cde4ddc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-build-vector.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-build-vector.mir @@ -1,6 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s -# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" -o - %s | FileCheck %s --- name: build_vector_v2s32_ss @@ -88,9 +87,9 @@ body: | ; CHECK-LABEL: name: build_vector_v2s32_aa ; CHECK: liveins: $agpr0, $agpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr1 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:agpr(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $agpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $agpr1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>) %0:_(s32) = COPY $agpr0 %1:_(s32) = COPY $agpr1 @@ -111,9 +110,8 @@ body: | ; CHECK: liveins: $vgpr0, $agpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $agpr0 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $agpr0 @@ -133,10 +131,9 @@ body: | ; CHECK-LABEL: name: build_vector_v2s32_av ; CHECK: liveins: $vgpr0, $agpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $agpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>) %0:_(s32) = COPY $agpr0 %1:_(s32) = COPY $vgpr0 @@ -157,10 +154,9 @@ body: | ; CHECK: liveins: $sgpr0, $agpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $agpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $agpr0 @@ -180,11 +176,10 @@ body: | ; CHECK-LABEL: name: build_vector_v2s32_as ; CHECK: liveins: $sgpr0, $agpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $agpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY2]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>) %0:_(s32) = COPY $agpr0 %1:_(s32) = COPY $sgpr0 @@ -204,10 +199,10 @@ body: | ; CHECK-LABEL: name: build_vector_v3s32_aaa ; CHECK: liveins: $agpr0, $agpr1, $agpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(s32) = COPY $agpr2 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:agpr(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $agpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $agpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $agpr2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(s32) = COPY $agpr0 %1:_(s32) = COPY $agpr1 @@ -228,11 +223,11 @@ body: | ; CHECK-LABEL: name: build_vector_v4s32_aaaa ; CHECK: liveins: $agpr0, $agpr1, $agpr2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(s32) = COPY $agpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:agpr(s32) = COPY $agpr2 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:agpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $agpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $agpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $agpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $agpr2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<4 x s32>) %0:_(s32) = COPY $agpr0 %1:_(s32) = COPY $agpr1 @@ -254,15 +249,15 @@ body: | ; CHECK-LABEL: name: build_vector_v8s32_aaaaaaaa ; CHECK: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(s32) = COPY $agpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:agpr(s32) = COPY $agpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:agpr(s32) = COPY $agpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(s32) = COPY $agpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:agpr(s32) = COPY $agpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:agpr(s32) = COPY $agpr7 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:agpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $agpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $agpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $agpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $agpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $agpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $agpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $agpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $agpr7 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<8 x s32>) %0:_(s32) = COPY $agpr0 %1:_(s32) = COPY $agpr1 @@ -288,23 +283,23 @@ body: | ; CHECK-LABEL: name: build_vector_v16s32_aaaaaaaaaaaaaaaa ; CHECK: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(s32) = COPY $agpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:agpr(s32) = COPY $agpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:agpr(s32) = COPY $agpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(s32) = COPY $agpr5 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:agpr(s32) = COPY $agpr6 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:agpr(s32) = COPY $agpr7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:agpr(s32) = COPY $agpr8 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:agpr(s32) = COPY $agpr9 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:agpr(s32) = COPY $agpr10 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:agpr(s32) = COPY $agpr11 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:agpr(s32) = COPY $agpr12 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:agpr(s32) = COPY $agpr13 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:agpr(s32) = COPY $agpr14 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:agpr(s32) = COPY $agpr15 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:agpr(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $agpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $agpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $agpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $agpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $agpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $agpr5 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $agpr6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $agpr7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $agpr8 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY $agpr9 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY $agpr10 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY $agpr11 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $agpr12 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY $agpr13 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr(s32) = COPY $agpr14 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr(s32) = COPY $agpr15 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x s32>) %0:_(s32) = COPY $agpr0 %1:_(s32) = COPY $agpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-default.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-default.mir index bd699956500ca..456e0c135a874 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-default.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-default.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -march amdgcn -mcpu=fiji -run-pass=regbankselect %s -o - | FileCheck %s +# RUN: llc -O0 -march amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -o - | FileCheck %s # Check the default mappings for various instructions. diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fadd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fadd.mir index 4fba30325f98b..8944fd78ca9ee 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fadd.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fadd.mir @@ -1,6 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -o - | FileCheck %s --- name: fadd_ss diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-frame-index.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-frame-index.mir index 2b60dcd9dcabe..62bf14b091340 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-frame-index.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-frame-index.mir @@ -1,6 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -o - | FileCheck %s --- name: test_frame_index_p5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ptr-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ptr-add.mir index 55048d51918fa..a70708134a126 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ptr-add.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ptr-add.mir @@ -1,6 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s -# RUN: llc -mtriple=amdgcn -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s +# RUN: llc -mtriple=amdgcn -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -o - | FileCheck %s --- name: gep_p1_s_k diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-split-scalar-load-metadata.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-split-scalar-load-metadata.mir index 2177cd7493c45..997ac804f710d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-split-scalar-load-metadata.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-split-scalar-load-metadata.mir @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -run-pass=regbankselect %s -o - | FileCheck -check-prefix=GFX7 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -run-pass=regbankselect %s -o - | FileCheck -check-prefix=GFX12 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -o - | FileCheck -check-prefix=GFX7 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -o - | FileCheck -check-prefix=GFX12 %s --- | diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir index 4fcd0fdf105d9..b0199d3ad5cd1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -o - | FileCheck %s --- name: sub_s32_ss diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uitofp.mir index 554c88a68972b..e95be13c47d3b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uitofp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uitofp.mir @@ -1,6 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -o - | FileCheck %s --- name: uitofp_s diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/shlN_add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/shlN_add.ll index ad60a617dcd1c..9f4a6f2f63f15 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/shlN_add.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/shlN_add.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10 %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX10 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX10 %s ; Test gfx9+ s_shl[1-4]_add_u32 pattern matching diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/trunc.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/trunc.ll index c19992325ea67..b6c8f21143ba1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/trunc.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/trunc.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=bonaire < %s | FileCheck -check-prefix=GFX7 %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tonga < %s | FileCheck -check-prefix=GFX8 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=bonaire < %s | FileCheck -check-prefix=GFX7 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=tonga < %s | FileCheck -check-prefix=GFX8 %s define i16 @v_trunc_i32_to_i16(i32 %src) { ; GFX7-LABEL: v_trunc_i32_to_i16: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/v_bfe_i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/v_bfe_i32.ll index 5408ad0747b74..abfb4fea69923 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/v_bfe_i32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/v_bfe_i32.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa --global-isel < %s | FileCheck --check-prefix=PREGFX9 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --global-isel -mcpu=hawaii < %s | FileCheck --check-prefix=PREGFX9 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --global-isel -mcpu=fiji < %s | FileCheck --check-prefix=PREGFX9 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --global-isel -mcpu=gfx90a < %s | FileCheck --check-prefix=PREGFX9 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --global-isel -mcpu=gfx1030 < %s | FileCheck --check-prefix=GFX10PLUS %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa --global-isel -mcpu=gfx1100 < %s | FileCheck --check-prefix=GFX10PLUS %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --global-isel -new-reg-bank-select < %s | FileCheck --check-prefix=PREGFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --global-isel -new-reg-bank-select -mcpu=hawaii < %s | FileCheck --check-prefix=PREGFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --global-isel -new-reg-bank-select -mcpu=fiji < %s | FileCheck --check-prefix=PREGFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --global-isel -new-reg-bank-select -mcpu=gfx90a < %s | FileCheck --check-prefix=PREGFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --global-isel -new-reg-bank-select -mcpu=gfx1030 < %s | FileCheck --check-prefix=GFX10PLUS %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --global-isel -new-reg-bank-select -mcpu=gfx1100 < %s | FileCheck --check-prefix=GFX10PLUS %s define i32 @check_v_bfe(i16 %a) { ; PREGFX9-LABEL: check_v_bfe: diff --git a/llvm/test/CodeGen/AMDGPU/allow-check.ll b/llvm/test/CodeGen/AMDGPU/allow-check.ll index d4f5621ce26a4..162a8bcfc9ff4 100644 --- a/llvm/test/CodeGen/AMDGPU/allow-check.ll +++ b/llvm/test/CodeGen/AMDGPU/allow-check.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc < %s -mtriple=amdgcn-amd-mesa3d -global-isel=0 -fast-isel=0 | FileCheck %s -; RUN: llc < %s -mtriple=amdgcn-amd-mesa3d -global-isel=1 -fast-isel=0 | FileCheck %s +; RUN: llc < %s -mtriple=amdgcn-amd-mesa3d -global-isel=1 -new-reg-bank-select -fast-isel=0 | FileCheck %s ; RUN: llc < %s -mtriple=amdgcn-amd-mesa3d -global-isel=0 -fast-isel=1 | FileCheck %s define i1 @test_runtime() local_unnamed_addr { diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn-cs-chain-intrinsic-dyn-vgpr-w32.ll b/llvm/test/CodeGen/AMDGPU/amdgcn-cs-chain-intrinsic-dyn-vgpr-w32.ll index 77c9b9813571a..3e80a58bda4a0 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn-cs-chain-intrinsic-dyn-vgpr-w32.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn-cs-chain-intrinsic-dyn-vgpr-w32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GISEL-GFX12 %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GISEL-GFX12 %s ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=DAGISEL-GFX12 %s declare amdgpu_cs_chain void @callee(<3 x i32> inreg, { i32, ptr addrspace(5), i32, i32 }) diff --git a/llvm/test/CodeGen/AMDGPU/bitop3.ll b/llvm/test/CodeGen/AMDGPU/bitop3.ll index ba818f6ecc069..187f19f653858 100644 --- a/llvm/test/CodeGen/AMDGPU/bitop3.ll +++ b/llvm/test/CodeGen/AMDGPU/bitop3.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel=0 -mtriple=amdgcn-- -mcpu=gfx950 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX950,GFX950-SDAG %s -; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=gfx950 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX950,GFX950-GISEL %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-- -mcpu=gfx950 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX950,GFX950-GISEL %s ; RUN: llc -global-isel=0 -mtriple=amdgcn-- -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1250,GFX1250-SDAG,GFX1250-SDAG-FAKE16,GFX1250-FAKE16 %s ; RUN: llc -global-isel=0 -mtriple=amdgcn-- -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1250,GFX1250-SDAG,GFX1250-SDAG-TRUE16,GFX1250-TRUE16 %s -; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1250,GFX1250-GISEL,GFX1250-GISEL-FAKE16,GFX1250-FAKE16 %s -; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1250,GFX1250-GISEL,GFX1250-GISEL-TRUE16,GFX1250-TRUE16 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-- -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1250,GFX1250-GISEL,GFX1250-GISEL-FAKE16,GFX1250-FAKE16 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-- -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1250,GFX1250-GISEL,GFX1250-GISEL-TRUE16,GFX1250-TRUE16 %s ; ========= Single bit functions ========= diff --git a/llvm/test/CodeGen/AMDGPU/invalid-addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/invalid-addrspacecast.ll index 5dff7372ab561..294c9045ebc14 100644 --- a/llvm/test/CodeGen/AMDGPU/invalid-addrspacecast.ll +++ b/llvm/test/CodeGen/AMDGPU/invalid-addrspacecast.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsad < %s | FileCheck %s -; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa < %s | FileCheck %s ; Check illegal casts are codegened as poison, and not an error. diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.id.ll index f668a116f3c83..cc38d0732399b 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.id.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.id.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: not --crash llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -global-isel=0 < %s 2>&1 | FileCheck -check-prefix=GFX9-SDAG-ERR %s -; RUN: not llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -global-isel=1 < %s 2>&1 | FileCheck -check-prefix=GFX9-GISEL-ERR %s +; RUN: not llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -global-isel=1 -new-reg-bank-select < %s 2>&1 | FileCheck -check-prefix=GFX9-GISEL-ERR %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 < %s | FileCheck -check-prefix=GFX9 %s -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 < %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefix=GFX9 %s ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=0 < %s | FileCheck -check-prefix=GFX12 %s -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=1 < %s | FileCheck -check-prefix=GFX12 %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefix=GFX12 %s ; GFX9-SDAG-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.wave.id ; GFX9-GISEL-ERR: LLVM ERROR: unable to legalize instruction: {{.*}} = G_INTRINSIC intrinsic(@llvm.amdgcn.wave.id) diff --git a/llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll b/llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll index 8629d548ec0c1..6cc396085280c 100644 --- a/llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll +++ b/llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -stop-after twoaddressinstruction < %s | FileCheck %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx900 -stop-after twoaddressinstruction < %s | FileCheck %s ; Check that %16 gets constrained to register class sgpr_96_with_sub0_sub1. define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(ptr addrspace(4) inreg %ptr) { diff --git a/llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.ll b/llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.ll index 77dc32d5fb829..2d7a91f0cd114 100644 --- a/llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.ll +++ b/llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s define amdgpu_ps void @intrinsic_store_system_scope(i32 %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { ; GFX12-LABEL: intrinsic_store_system_scope: