diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp old mode 100644 new mode 100755 index 40e176c2ab5ce..a136e8718435b --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -1652,6 +1652,13 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::amdgcn_perm: case Intrinsic::amdgcn_wave_reduce_umin: case Intrinsic::amdgcn_wave_reduce_umax: + case Intrinsic::amdgcn_wave_reduce_max: + case Intrinsic::amdgcn_wave_reduce_min: + case Intrinsic::amdgcn_wave_reduce_add: + case Intrinsic::amdgcn_wave_reduce_sub: + case Intrinsic::amdgcn_wave_reduce_and: + case Intrinsic::amdgcn_wave_reduce_or: + case Intrinsic::amdgcn_wave_reduce_xor: case Intrinsic::amdgcn_s_wqm: case Intrinsic::amdgcn_s_quadmask: case Intrinsic::amdgcn_s_bitreplicate: @@ -3672,6 +3679,13 @@ static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty, return ConstantInt::get(Ty, C0->abs()); case Intrinsic::amdgcn_wave_reduce_umin: case Intrinsic::amdgcn_wave_reduce_umax: + case Intrinsic::amdgcn_wave_reduce_max: + case Intrinsic::amdgcn_wave_reduce_min: + case Intrinsic::amdgcn_wave_reduce_add: + case Intrinsic::amdgcn_wave_reduce_sub: + case Intrinsic::amdgcn_wave_reduce_and: + case Intrinsic::amdgcn_wave_reduce_or: + case Intrinsic::amdgcn_wave_reduce_xor: return dyn_cast(Operands[0]); } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll index 5f303e30c923b..a7ebf458d2591 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll @@ -200,1781 +200,1011 @@ entry: ret void } -define amdgpu_kernel void @const_value(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: const_value: +define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { +; GFX8DAGISEL-LABEL: divergent_value: ; GFX8DAGISEL: ; %bb.0: ; %entry ; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0 +; GFX8DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] +; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX8DAGISEL-NEXT: s_add_i32 s4, s4, s6 +; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX8DAGISEL-NEXT: ; %bb.2: ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2 ; GFX8DAGISEL-NEXT: s_endpgm ; -; GFX8GISEL-LABEL: const_value: +; GFX8GISEL-LABEL: divergent_value: ; GFX8GISEL: ; %bb.0: ; %entry ; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX8GISEL-NEXT: s_mov_b32 s4, 0 +; GFX8GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] +; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX8GISEL-NEXT: s_add_i32 s4, s4, s6 +; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX8GISEL-NEXT: ; %bb.2: ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 ; GFX8GISEL-NEXT: s_endpgm ; -; GFX9DAGISEL-LABEL: const_value: +; GFX9DAGISEL-LABEL: divergent_value: ; GFX9DAGISEL: ; %bb.0: ; %entry ; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0 +; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] +; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX9DAGISEL-NEXT: s_add_i32 s4, s4, s6 +; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX9DAGISEL-NEXT: ; %bb.2: +; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9DAGISEL-NEXT: s_endpgm ; -; GFX9GISEL-LABEL: const_value: +; GFX9GISEL-LABEL: divergent_value: ; GFX9GISEL: ; %bb.0: ; %entry ; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX9GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] +; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX9GISEL-NEXT: s_add_i32 s4, s4, s6 +; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX9GISEL-NEXT: ; %bb.2: +; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9GISEL-NEXT: s_endpgm ; -; GFX1064DAGISEL-LABEL: const_value: +; GFX1064DAGISEL-LABEL: divergent_value: ; GFX1064DAGISEL: ; %bb.0: ; %entry ; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0 +; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] +; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX1064DAGISEL-NEXT: s_add_i32 s4, s4, s6 +; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1064DAGISEL-NEXT: ; %bb.2: +; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] +; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX1064DAGISEL-NEXT: s_endpgm ; -; GFX1064GISEL-LABEL: const_value: +; GFX1064GISEL-LABEL: divergent_value: ; GFX1064GISEL: ; %bb.0: ; %entry ; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1064GISEL-NEXT: s_mov_b32 s4, 0 +; GFX1064GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] +; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX1064GISEL-NEXT: s_add_i32 s4, s4, s6 +; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1064GISEL-NEXT: ; %bb.2: +; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX1064GISEL-NEXT: s_endpgm ; -; GFX1032DAGISEL-LABEL: const_value: +; GFX1032DAGISEL-LABEL: divergent_value: ; GFX1032DAGISEL: ; %bb.0: ; %entry ; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo +; GFX1032DAGISEL-NEXT: s_mov_b32 s2, 0 +; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3 +; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 +; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4 +; GFX1032DAGISEL-NEXT: s_add_i32 s2, s2, s5 +; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1032DAGISEL-NEXT: ; %bb.2: +; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] +; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX1032DAGISEL-NEXT: s_endpgm ; -; GFX1032GISEL-LABEL: const_value: +; GFX1032GISEL-LABEL: divergent_value: ; GFX1032GISEL: ; %bb.0: ; %entry ; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032GISEL-NEXT: s_mulk_i32 s2, 0x7b +; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo +; GFX1032GISEL-NEXT: s_mov_b32 s2, 0 +; GFX1032GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3 +; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4 +; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4 +; GFX1032GISEL-NEXT: s_add_i32 s2, s2, s5 +; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1032GISEL-NEXT: ; %bb.2: ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX1032GISEL-NEXT: s_endpgm ; -; GFX1164DAGISEL-LABEL: const_value: +; GFX1164DAGISEL-LABEL: divergent_value: ; GFX1164DAGISEL: ; %bb.0: ; %entry ; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX1164DAGISEL-NEXT: s_mov_b32 s4, 0 +; GFX1164DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] +; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX1164DAGISEL-NEXT: s_add_i32 s4, s4, s6 +; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1164DAGISEL-NEXT: ; %bb.2: +; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX1164DAGISEL-NEXT: s_endpgm ; -; GFX1164GISEL-LABEL: const_value: +; GFX1164GISEL-LABEL: divergent_value: ; GFX1164GISEL: ; %bb.0: ; %entry ; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1164GISEL-NEXT: s_mov_b32 s4, 0 +; GFX1164GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] +; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX1164GISEL-NEXT: s_add_i32 s4, s4, s6 +; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1164GISEL-NEXT: ; %bb.2: +; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX1164GISEL-NEXT: s_endpgm ; -; GFX1132DAGISEL-LABEL: const_value: +; GFX1132DAGISEL-LABEL: divergent_value: ; GFX1132DAGISEL: ; %bb.0: ; %entry ; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 +; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo +; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0 +; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3 +; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 +; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4 +; GFX1132DAGISEL-NEXT: s_add_i32 s2, s2, s5 +; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1132DAGISEL-NEXT: ; %bb.2: +; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX1132DAGISEL-NEXT: s_endpgm ; -; GFX1132GISEL-LABEL: const_value: +; GFX1132GISEL-LABEL: divergent_value: ; GFX1132GISEL: ; %bb.0: ; %entry ; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo +; GFX1132GISEL-NEXT: s_mov_b32 s2, 0 +; GFX1132GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3 +; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4 +; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4 +; GFX1132GISEL-NEXT: s_add_i32 s2, s2, s5 +; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1132GISEL-NEXT: ; %bb.2: +; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX1132GISEL-NEXT: s_endpgm ; -; GFX12DAGISEL-LABEL: const_value: +; GFX12DAGISEL-LABEL: divergent_value: ; GFX12DAGISEL: ; %bb.0: ; %entry ; GFX12DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX12DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX12DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX12DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX12DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX12DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; GFX12DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 +; GFX12DAGISEL-NEXT: s_mov_b32 s3, exec_lo +; GFX12DAGISEL-NEXT: s_mov_b32 s2, 0 +; GFX12DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX12DAGISEL-NEXT: s_ctz_i32_b32 s4, s3 +; GFX12DAGISEL-NEXT: s_wait_alu 0xfffe +; GFX12DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 +; GFX12DAGISEL-NEXT: s_bitset0_b32 s3, s4 +; GFX12DAGISEL-NEXT: s_add_co_i32 s2, s2, s5 +; GFX12DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 +; GFX12DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX12DAGISEL-NEXT: ; %bb.2: +; GFX12DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX12DAGISEL-NEXT: s_wait_kmcnt 0x0 -; GFX12DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX12DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX12DAGISEL-NEXT: s_endpgm entry: - %result = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 123, i32 1) + %id.x = call i32 @llvm.amdgcn.workitem.id.x() + %result = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %id.x, i32 1) store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) { -; GFX8DAGISEL-LABEL: poison_value: +define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { +; GFX8DAGISEL-LABEL: divergent_cfg: ; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 +; GFX8DAGISEL-NEXT: ; implicit-def: $sgpr2 +; GFX8DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX8DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX8DAGISEL-NEXT: ; %bb.1: ; %else +; GFX8DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c ; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX8DAGISEL-NEXT: ; implicit-def: $vgpr0 ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2 -; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2 +; GFX8DAGISEL-NEXT: s_mul_i32 s2, s6, s2 +; GFX8DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX8DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX8DAGISEL-NEXT: ; %bb.3: ; %if +; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX8DAGISEL-NEXT: s_mov_b32 s6, 0 +; GFX8DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] +; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX8DAGISEL-NEXT: s_add_i32 s6, s6, s8 +; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX8DAGISEL-NEXT: ; %bb.5: +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6 +; GFX8DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX8DAGISEL-NEXT: flat_store_dword v[2:3], v1 ; GFX8DAGISEL-NEXT: s_endpgm ; -; GFX8GISEL-LABEL: poison_value: +; GFX8GISEL-LABEL: divergent_cfg: ; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 +; GFX8GISEL-NEXT: ; implicit-def: $sgpr6 +; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX8GISEL-NEXT: ; %bb.1: ; %else +; GFX8GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c ; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX8GISEL-NEXT: ; implicit-def: $vgpr0 +; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX8GISEL-NEXT: s_mul_i32 s6, s6, s2 +; GFX8GISEL-NEXT: .LBB2_2: ; %Flow +; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_5 +; GFX8GISEL-NEXT: ; %bb.3: ; %if +; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX8GISEL-NEXT: s_mov_b32 s6, 0 +; GFX8GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] +; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX8GISEL-NEXT: s_add_i32 s6, s6, s8 +; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX8GISEL-NEXT: .LBB2_5: ; %endif +; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: s_mul_i32 s2, s0, s2 ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s2 ; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 ; GFX8GISEL-NEXT: s_endpgm ; -; GFX9DAGISEL-LABEL: poison_value: +; GFX9DAGISEL-LABEL: divergent_cfg: ; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX9DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 +; GFX9DAGISEL-NEXT: ; implicit-def: $sgpr2 +; GFX9DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX9DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX9DAGISEL-NEXT: ; %bb.1: ; %else +; GFX9DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c +; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX9DAGISEL-NEXT: ; implicit-def: $vgpr0 ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: s_mul_i32 s2, s0, s2 +; GFX9DAGISEL-NEXT: s_mul_i32 s2, s6, s2 +; GFX9DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX9DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX9DAGISEL-NEXT: ; %bb.3: ; %if +; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX9DAGISEL-NEXT: s_mov_b32 s6, 0 +; GFX9DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] +; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX9DAGISEL-NEXT: s_add_i32 s6, s6, s8 +; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX9DAGISEL-NEXT: ; %bb.5: +; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6 +; GFX9DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] ; GFX9DAGISEL-NEXT: s_endpgm ; -; GFX9GISEL-LABEL: poison_value: +; GFX9GISEL-LABEL: divergent_cfg: ; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 +; GFX9GISEL-NEXT: ; implicit-def: $sgpr6 +; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX9GISEL-NEXT: ; %bb.1: ; %else +; GFX9GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c ; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX9GISEL-NEXT: ; implicit-def: $vgpr0 +; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9GISEL-NEXT: s_mul_i32 s6, s6, s2 +; GFX9GISEL-NEXT: .LBB2_2: ; %Flow +; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_5 +; GFX9GISEL-NEXT: ; %bb.3: ; %if +; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX9GISEL-NEXT: s_mov_b32 s6, 0 +; GFX9GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] +; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX9GISEL-NEXT: s_add_i32 s6, s6, s8 +; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX9GISEL-NEXT: .LBB2_5: ; %endif +; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9GISEL-NEXT: s_endpgm ; -; GFX1064DAGISEL-LABEL: poison_value: +; GFX1064DAGISEL-LABEL: divergent_cfg: ; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1064DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 +; GFX1064DAGISEL-NEXT: ; implicit-def: $sgpr2 +; GFX1064DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX1064DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1064DAGISEL-NEXT: ; %bb.1: ; %else +; GFX1064DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c ; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX1064DAGISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s0, s2 +; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s6, s2 +; GFX1064DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX1064DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if +; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1064DAGISEL-NEXT: s_mov_b32 s6, 0 +; GFX1064DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] +; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX1064DAGISEL-NEXT: s_add_i32 s6, s6, s8 +; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1064DAGISEL-NEXT: ; %bb.5: +; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6 +; GFX1064DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] ; GFX1064DAGISEL-NEXT: s_endpgm ; -; GFX1064GISEL-LABEL: poison_value: +; GFX1064GISEL-LABEL: divergent_cfg: ; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1064GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 +; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6 +; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1064GISEL-NEXT: ; %bb.1: ; %else +; GFX1064GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c ; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX1064GISEL-NEXT: s_mul_i32 s6, s6, s2 +; GFX1064GISEL-NEXT: .LBB2_2: ; %Flow +; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_5 +; GFX1064GISEL-NEXT: ; %bb.3: ; %if +; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1064GISEL-NEXT: s_mov_b32 s6, 0 +; GFX1064GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] +; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX1064GISEL-NEXT: s_add_i32 s6, s6, s8 +; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1064GISEL-NEXT: .LBB2_5: ; %endif +; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 +; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX1064GISEL-NEXT: s_endpgm ; -; GFX1032DAGISEL-LABEL: poison_value: +; GFX1032DAGISEL-LABEL: divergent_cfg: ; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1032DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc_lo, 15, v0 +; GFX1032DAGISEL-NEXT: ; implicit-def: $sgpr1 +; GFX1032DAGISEL-NEXT: s_and_saveexec_b32 s0, vcc_lo +; GFX1032DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0 +; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1032DAGISEL-NEXT: ; %bb.1: ; %else +; GFX1032DAGISEL-NEXT: s_load_dword s1, s[4:5], 0x2c ; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX1032DAGISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX1032DAGISEL-NEXT: s_mul_i32 s1, s1, s2 +; GFX1032DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX1032DAGISEL-NEXT: s_or_saveexec_b32 s0, s0 +; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if +; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1032DAGISEL-NEXT: s_mov_b32 s1, 0 +; GFX1032DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2 +; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 +; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3 +; GFX1032DAGISEL-NEXT: s_add_i32 s1, s1, s6 +; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1032DAGISEL-NEXT: ; %bb.5: +; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX1032DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] ; GFX1032DAGISEL-NEXT: s_endpgm ; -; GFX1032GISEL-LABEL: poison_value: +; GFX1032GISEL-LABEL: divergent_cfg: ; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1032GISEL-NEXT: v_cmp_le_u32_e32 vcc_lo, 16, v0 +; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0 +; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo +; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1032GISEL-NEXT: ; %bb.1: ; %else +; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c ; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[0:1] +; GFX1032GISEL-NEXT: s_mul_i32 s0, s0, s2 +; GFX1032GISEL-NEXT: .LBB2_2: ; %Flow +; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_5 +; GFX1032GISEL-NEXT: ; %bb.3: ; %if +; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1032GISEL-NEXT: s_mov_b32 s0, 0 +; GFX1032GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2 +; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3 +; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3 +; GFX1032GISEL-NEXT: s_add_i32 s0, s0, s6 +; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1032GISEL-NEXT: .LBB2_5: ; %endif +; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 +; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[2:3] ; GFX1032GISEL-NEXT: s_endpgm ; -; GFX1164DAGISEL-LABEL: poison_value: +; GFX1164DAGISEL-LABEL: divergent_cfg: ; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], exec +; GFX1164DAGISEL-NEXT: ; implicit-def: $sgpr2 +; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1164DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 +; GFX1164DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1164DAGISEL-NEXT: ; %bb.1: ; %else +; GFX1164DAGISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX1164DAGISEL-NEXT: ; implicit-def: $vgpr0 +; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s6, s2 +; GFX1164DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX1164DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if +; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1164DAGISEL-NEXT: s_mov_b32 s6, 0 +; GFX1164DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] +; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX1164DAGISEL-NEXT: s_add_i32 s6, s6, s8 +; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1164DAGISEL-NEXT: ; %bb.5: +; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6 +; GFX1164DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX1164DAGISEL-NEXT: s_endpgm ; -; GFX1164GISEL-LABEL: poison_value: +; GFX1164GISEL-LABEL: divergent_cfg: ; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], exec +; GFX1164GISEL-NEXT: ; implicit-def: $sgpr6 +; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 +; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1164GISEL-NEXT: ; %bb.1: ; %else +; GFX1164GISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0 +; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: s_mul_i32 s2, s0, s2 +; GFX1164GISEL-NEXT: s_mul_i32 s6, s6, s2 +; GFX1164GISEL-NEXT: .LBB2_2: ; %Flow +; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1] +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_5 +; GFX1164GISEL-NEXT: ; %bb.3: ; %if +; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1164GISEL-NEXT: s_mov_b32 s6, 0 +; GFX1164GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX1164GISEL-NEXT: s_add_i32 s6, s6, s8 +; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1164GISEL-NEXT: .LBB2_5: ; %endif +; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 +; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX1164GISEL-NEXT: s_endpgm ; -; GFX1132DAGISEL-LABEL: poison_value: +; GFX1132DAGISEL-LABEL: divergent_cfg: ; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1132DAGISEL-NEXT: s_mov_b32 s0, exec_lo +; GFX1132DAGISEL-NEXT: ; implicit-def: $sgpr1 +; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1132DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 +; GFX1132DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0 +; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1132DAGISEL-NEXT: ; %bb.1: ; %else +; GFX1132DAGISEL-NEXT: s_load_b32 s1, s[4:5], 0x2c ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX1132DAGISEL-NEXT: ; implicit-def: $vgpr0 +; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: poison_value: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -; -; GFX12DAGISEL-LABEL: poison_value: -; GFX12DAGISEL: ; %bb.0: ; %entry -; GFX12DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX12DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX12DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) -; GFX12DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX12DAGISEL-NEXT: s_wait_kmcnt 0x0 -; GFX12DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX12DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX12DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX12DAGISEL-NEXT: s_endpgm -entry: - %result = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 poison, i32 1) - store i32 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: divergent_value: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX8DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] -; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX8DAGISEL-NEXT: s_add_i32 s4, s4, s6 -; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX8DAGISEL-NEXT: ; %bb.2: -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: divergent_value: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_mov_b32 s4, 0 -; GFX8GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] -; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX8GISEL-NEXT: s_add_i32 s4, s4, s6 -; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX8GISEL-NEXT: ; %bb.2: -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: divergent_value: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX9DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] -; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX9DAGISEL-NEXT: s_add_i32 s4, s4, s6 -; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX9DAGISEL-NEXT: ; %bb.2: -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: divergent_value: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] -; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX9GISEL-NEXT: s_add_i32 s4, s4, s6 -; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX9GISEL-NEXT: ; %bb.2: -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX1064DAGISEL-LABEL: divergent_value: -; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX1064DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] -; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX1064DAGISEL-NEXT: s_add_i32 s4, s4, s6 -; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1064DAGISEL-NEXT: ; %bb.2: -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1064DAGISEL-NEXT: s_endpgm -; -; GFX1064GISEL-LABEL: divergent_value: -; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064GISEL-NEXT: s_mov_b32 s4, 0 -; GFX1064GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] -; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX1064GISEL-NEXT: s_add_i32 s4, s4, s6 -; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1064GISEL-NEXT: ; %bb.2: -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1064GISEL-NEXT: s_endpgm -; -; GFX1032DAGISEL-LABEL: divergent_value: -; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, 0 -; GFX1032DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3 -; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 -; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4 -; GFX1032DAGISEL-NEXT: s_add_i32 s2, s2, s5 -; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1032DAGISEL-NEXT: ; %bb.2: -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1032DAGISEL-NEXT: s_endpgm -; -; GFX1032GISEL-LABEL: divergent_value: -; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo -; GFX1032GISEL-NEXT: s_mov_b32 s2, 0 -; GFX1032GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3 -; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4 -; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4 -; GFX1032GISEL-NEXT: s_add_i32 s2, s2, s5 -; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1032GISEL-NEXT: ; %bb.2: -; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1032GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: divergent_value: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX1164DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX1164DAGISEL-NEXT: s_add_i32 s4, s4, s6 -; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1164DAGISEL-NEXT: ; %bb.2: -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: divergent_value: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: s_mov_b32 s4, 0 -; GFX1164GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] -; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX1164GISEL-NEXT: s_add_i32 s4, s4, s6 -; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1164GISEL-NEXT: ; %bb.2: -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: divergent_value: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 -; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0 -; GFX1132DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 -; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4 -; GFX1132DAGISEL-NEXT: s_add_i32 s2, s2, s5 -; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1132DAGISEL-NEXT: ; %bb.2: -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: divergent_value: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo -; GFX1132GISEL-NEXT: s_mov_b32 s2, 0 -; GFX1132GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3 -; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4 -; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4 -; GFX1132GISEL-NEXT: s_add_i32 s2, s2, s5 -; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1132GISEL-NEXT: ; %bb.2: -; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -; -; GFX12DAGISEL-LABEL: divergent_value: -; GFX12DAGISEL: ; %bb.0: ; %entry -; GFX12DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX12DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 -; GFX12DAGISEL-NEXT: s_mov_b32 s3, exec_lo -; GFX12DAGISEL-NEXT: s_mov_b32 s2, 0 -; GFX12DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX12DAGISEL-NEXT: s_ctz_i32_b32 s4, s3 -; GFX12DAGISEL-NEXT: s_wait_alu 0xfffe -; GFX12DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 -; GFX12DAGISEL-NEXT: s_bitset0_b32 s3, s4 -; GFX12DAGISEL-NEXT: s_add_co_i32 s2, s2, s5 -; GFX12DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX12DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX12DAGISEL-NEXT: ; %bb.2: -; GFX12DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX12DAGISEL-NEXT: s_wait_kmcnt 0x0 -; GFX12DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX12DAGISEL-NEXT: s_endpgm -entry: - %id.x = call i32 @llvm.amdgcn.workitem.id.x() - %result = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %id.x, i32 1) - store i32 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { -; GFX8DAGISEL-LABEL: divergent_cfg: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 -; GFX8DAGISEL-NEXT: ; implicit-def: $sgpr2 -; GFX8DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX8DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX8DAGISEL-NEXT: ; %bb.1: ; %else -; GFX8DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: s_mul_i32 s2, s6, s2 -; GFX8DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX8DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX8DAGISEL-NEXT: ; %bb.3: ; %if -; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX8DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] -; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX8DAGISEL-NEXT: s_add_i32 s6, s6, s8 -; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX8DAGISEL-NEXT: ; %bb.5: -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX8DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8DAGISEL-NEXT: flat_store_dword v[2:3], v1 -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: divergent_cfg: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 -; GFX8GISEL-NEXT: ; implicit-def: $sgpr6 -; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX8GISEL-NEXT: ; %bb.1: ; %else -; GFX8GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8GISEL-NEXT: ; implicit-def: $vgpr0 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: s_mul_i32 s6, s6, s2 -; GFX8GISEL-NEXT: .LBB4_2: ; %Flow -; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_5 -; GFX8GISEL-NEXT: ; %bb.3: ; %if -; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_mov_b32 s6, 0 -; GFX8GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] -; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX8GISEL-NEXT: s_add_i32 s6, s6, s8 -; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX8GISEL-NEXT: .LBB4_5: ; %endif -; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: divergent_cfg: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 -; GFX9DAGISEL-NEXT: ; implicit-def: $sgpr2 -; GFX9DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX9DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX9DAGISEL-NEXT: ; %bb.1: ; %else -; GFX9DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: s_mul_i32 s2, s6, s2 -; GFX9DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX9DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX9DAGISEL-NEXT: ; %bb.3: ; %if -; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX9DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] -; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX9DAGISEL-NEXT: s_add_i32 s6, s6, s8 -; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX9DAGISEL-NEXT: ; %bb.5: -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX9DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: divergent_cfg: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 -; GFX9GISEL-NEXT: ; implicit-def: $sgpr6 -; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX9GISEL-NEXT: ; %bb.1: ; %else -; GFX9GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9GISEL-NEXT: ; implicit-def: $vgpr0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: s_mul_i32 s6, s6, s2 -; GFX9GISEL-NEXT: .LBB4_2: ; %Flow -; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_5 -; GFX9GISEL-NEXT: ; %bb.3: ; %if -; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_mov_b32 s6, 0 -; GFX9GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] -; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX9GISEL-NEXT: s_add_i32 s6, s6, s8 -; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX9GISEL-NEXT: .LBB4_5: ; %endif -; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX1064DAGISEL-LABEL: divergent_cfg: -; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 -; GFX1064DAGISEL-NEXT: ; implicit-def: $sgpr2 -; GFX1064DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX1064DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1064DAGISEL-NEXT: ; %bb.1: ; %else -; GFX1064DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s6, s2 -; GFX1064DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX1064DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if -; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX1064DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] -; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX1064DAGISEL-NEXT: s_add_i32 s6, s6, s8 -; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1064DAGISEL-NEXT: ; %bb.5: -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX1064DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX1064DAGISEL-NEXT: s_endpgm -; -; GFX1064GISEL-LABEL: divergent_cfg: -; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 -; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6 -; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1064GISEL-NEXT: ; %bb.1: ; %else -; GFX1064GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: s_mul_i32 s6, s6, s2 -; GFX1064GISEL-NEXT: .LBB4_2: ; %Flow -; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_5 -; GFX1064GISEL-NEXT: ; %bb.3: ; %if -; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064GISEL-NEXT: s_mov_b32 s6, 0 -; GFX1064GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] -; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX1064GISEL-NEXT: s_add_i32 s6, s6, s8 -; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1064GISEL-NEXT: .LBB4_5: ; %endif -; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1064GISEL-NEXT: s_endpgm -; -; GFX1032DAGISEL-LABEL: divergent_cfg: -; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc_lo, 15, v0 -; GFX1032DAGISEL-NEXT: ; implicit-def: $sgpr1 -; GFX1032DAGISEL-NEXT: s_and_saveexec_b32 s0, vcc_lo -; GFX1032DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0 -; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1032DAGISEL-NEXT: ; %bb.1: ; %else -; GFX1032DAGISEL-NEXT: s_load_dword s1, s[4:5], 0x2c -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: s_mul_i32 s1, s1, s2 -; GFX1032DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX1032DAGISEL-NEXT: s_or_saveexec_b32 s0, s0 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032DAGISEL-NEXT: s_mov_b32 s1, 0 -; GFX1032DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2 -; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 -; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3 -; GFX1032DAGISEL-NEXT: s_add_i32 s1, s1, s6 -; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1032DAGISEL-NEXT: ; %bb.5: -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1032DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 -; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX1032DAGISEL-NEXT: s_endpgm -; -; GFX1032GISEL-LABEL: divergent_cfg: -; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: v_cmp_le_u32_e32 vcc_lo, 16, v0 -; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0 -; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo -; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1032GISEL-NEXT: ; %bb.1: ; %else -; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c -; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: s_mul_i32 s0, s0, s2 -; GFX1032GISEL-NEXT: .LBB4_2: ; %Flow -; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_5 -; GFX1032GISEL-NEXT: ; %bb.3: ; %if -; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032GISEL-NEXT: s_mov_b32 s0, 0 -; GFX1032GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2 -; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3 -; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3 -; GFX1032GISEL-NEXT: s_add_i32 s0, s0, s6 -; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1032GISEL-NEXT: .LBB4_5: ; %endif -; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 -; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[2:3] -; GFX1032GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: divergent_cfg: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], exec -; GFX1164DAGISEL-NEXT: ; implicit-def: $sgpr2 -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1164DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 -; GFX1164DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1164DAGISEL-NEXT: ; %bb.1: ; %else -; GFX1164DAGISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c -; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s6, s2 -; GFX1164DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX1164DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if -; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX1164DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX1164DAGISEL-NEXT: s_add_i32 s6, s6, s8 -; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1164DAGISEL-NEXT: ; %bb.5: -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX1164DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: divergent_cfg: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], exec -; GFX1164GISEL-NEXT: ; implicit-def: $sgpr6 -; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 -; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1164GISEL-NEXT: ; %bb.1: ; %else -; GFX1164GISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: s_mul_i32 s6, s6, s2 -; GFX1164GISEL-NEXT: .LBB4_2: ; %Flow -; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_5 -; GFX1164GISEL-NEXT: ; %bb.3: ; %if -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: s_mov_b32 s6, 0 -; GFX1164GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX1164GISEL-NEXT: s_add_i32 s6, s6, s8 -; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1164GISEL-NEXT: .LBB4_5: ; %endif -; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: divergent_cfg: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1132DAGISEL-NEXT: s_mov_b32 s0, exec_lo -; GFX1132DAGISEL-NEXT: ; implicit-def: $sgpr1 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1132DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 -; GFX1132DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0 -; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1132DAGISEL-NEXT: ; %bb.1: ; %else -; GFX1132DAGISEL-NEXT: s_load_b32 s1, s[4:5], 0x2c -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: s_mul_i32 s1, s1, s2 -; GFX1132DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX1132DAGISEL-NEXT: s_or_saveexec_b32 s0, s0 -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: s_mov_b32 s1, 0 -; GFX1132DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 -; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 -; GFX1132DAGISEL-NEXT: s_add_i32 s1, s1, s6 -; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1132DAGISEL-NEXT: ; %bb.5: -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1132DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: divergent_cfg: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1132GISEL-NEXT: s_mov_b32 s1, exec_lo -; GFX1132GISEL-NEXT: ; implicit-def: $sgpr0 -; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 -; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1132GISEL-NEXT: ; %bb.1: ; %else -; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: s_mul_i32 s0, s0, s2 -; GFX1132GISEL-NEXT: .LBB4_2: ; %Flow -; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_5 -; GFX1132GISEL-NEXT: ; %bb.3: ; %if -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: s_mov_b32 s0, 0 -; GFX1132GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3 -; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 -; GFX1132GISEL-NEXT: s_add_i32 s0, s0, s6 -; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1132GISEL-NEXT: .LBB4_5: ; %endif -; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 -; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 -; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[2:3] -; GFX1132GISEL-NEXT: s_endpgm -; -; GFX12DAGISEL-LABEL: divergent_cfg: -; GFX12DAGISEL: ; %bb.0: ; %entry -; GFX12DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX12DAGISEL-NEXT: s_mov_b32 s0, exec_lo -; GFX12DAGISEL-NEXT: ; implicit-def: $sgpr1 -; GFX12DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 -; GFX12DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0 -; GFX12DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX12DAGISEL-NEXT: ; %bb.1: ; %else -; GFX12DAGISEL-NEXT: s_load_b32 s1, s[4:5], 0x2c -; GFX12DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX12DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX12DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX12DAGISEL-NEXT: s_wait_kmcnt 0x0 -; GFX12DAGISEL-NEXT: s_mul_i32 s1, s1, s2 -; GFX12DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX12DAGISEL-NEXT: s_or_saveexec_b32 s0, s0 -; GFX12DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX12DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX12DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX12DAGISEL-NEXT: ; %bb.3: ; %if -; GFX12DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX12DAGISEL-NEXT: s_mov_b32 s1, 0 -; GFX12DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX12DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 -; GFX12DAGISEL-NEXT: s_wait_alu 0xfffe -; GFX12DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 -; GFX12DAGISEL-NEXT: s_bitset0_b32 s2, s3 -; GFX12DAGISEL-NEXT: s_add_co_i32 s1, s1, s6 -; GFX12DAGISEL-NEXT: s_wait_alu 0xfffe -; GFX12DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX12DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX12DAGISEL-NEXT: ; %bb.5: -; GFX12DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX12DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX12DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 -; GFX12DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX12DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX12DAGISEL-NEXT: s_wait_kmcnt 0x0 -; GFX12DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX12DAGISEL-NEXT: s_endpgm -entry: - %tid = call i32 @llvm.amdgcn.workitem.id.x() - %d_cmp = icmp ult i32 %tid, 16 - br i1 %d_cmp, label %if, label %else - -if: - %reducedValTid = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %tid, i32 1) - br label %endif - -else: - %reducedValIn = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %in, i32 1) - br label %endif - -endif: - %combine = phi i32 [%reducedValTid, %if], [%reducedValIn, %else] - store i32 %combine, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @uniform_value_i64(ptr addrspace(1) %out, i64 %in) { -; GFX8DAGISEL-LABEL: uniform_value_i64: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: s_mul_i32 s0, s2, s4 -; GFX8DAGISEL-NEXT: s_mul_hi_u32 s1, s2, s4 -; GFX8DAGISEL-NEXT: s_mul_i32 s2, s3, s4 -; GFX8DAGISEL-NEXT: s_add_u32 s1, s1, s2 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: uniform_value_i64: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s5, s[4:5] -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: s_mul_i32 s4, s2, s5 -; GFX8GISEL-NEXT: s_mul_hi_u32 s2, s2, s5 -; GFX8GISEL-NEXT: s_mul_i32 s3, s3, s5 -; GFX8GISEL-NEXT: s_add_u32 s5, s2, s3 -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s5 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: uniform_value_i64: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s5, s[4:5] -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: s_mul_i32 s4, s2, s5 -; GFX9DAGISEL-NEXT: s_mul_hi_u32 s2, s2, s5 -; GFX9DAGISEL-NEXT: s_mul_i32 s3, s3, s5 -; GFX9DAGISEL-NEXT: s_add_u32 s5, s2, s3 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s5 -; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: uniform_value_i64: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX9GISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s5, s[4:5] -; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: s_mul_i32 s4, s2, s5 -; GFX9GISEL-NEXT: s_mul_hi_u32 s2, s2, s5 -; GFX9GISEL-NEXT: s_mul_i32 s3, s3, s5 -; GFX9GISEL-NEXT: s_add_u32 s5, s2, s3 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s5 -; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX1064DAGISEL-LABEL: uniform_value_i64: -; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: s_mul_hi_u32 s5, s2, s4 -; GFX1064DAGISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1064DAGISEL-NEXT: s_add_u32 s3, s5, s3 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1064DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1064DAGISEL-NEXT: s_endpgm -; -; GFX1064GISEL-LABEL: uniform_value_i64: -; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: s_mul_hi_u32 s5, s2, s4 -; GFX1064GISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1064GISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1064GISEL-NEXT: s_add_u32 s3, s5, s3 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1064GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1064GISEL-NEXT: s_endpgm -; -; GFX1032DAGISEL-LABEL: uniform_value_i64: -; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: s_mov_b32 s4, exec_lo -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4 -; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: s_mul_hi_u32 s5, s2, s4 -; GFX1032DAGISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1032DAGISEL-NEXT: s_add_u32 s3, s5, s3 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1032DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1032DAGISEL-NEXT: s_endpgm -; -; GFX1032GISEL-LABEL: uniform_value_i64: -; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX1032GISEL-NEXT: s_mov_b32 s4, exec_lo -; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s4, s4 -; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: s_mul_hi_u32 s5, s2, s4 -; GFX1032GISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1032GISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1032GISEL-NEXT: s_add_u32 s3, s5, s3 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1032GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1032GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: uniform_value_i64: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: s_mul_hi_u32 s5, s2, s4 -; GFX1164DAGISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1164DAGISEL-NEXT: s_add_u32 s3, s5, s3 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: uniform_value_i64: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX1164GISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: s_mul_hi_u32 s5, s2, s4 -; GFX1164GISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1164GISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1164GISEL-NEXT: s_add_u32 s3, s5, s3 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: uniform_value_i64: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: s_mov_b32 s4, exec_lo -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4 -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: s_mul_hi_u32 s5, s2, s4 -; GFX1132DAGISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1132DAGISEL-NEXT: s_add_u32 s3, s5, s3 -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3 -; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: uniform_value_i64: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX1132GISEL-NEXT: s_mov_b32 s4, exec_lo -; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s4, s4 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: s_mul_hi_u32 s5, s2, s4 -; GFX1132GISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1132GISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1132GISEL-NEXT: s_add_u32 s3, s5, s3 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -; -; GFX12DAGISEL-LABEL: uniform_value_i64: -; GFX12DAGISEL: ; %bb.0: ; %entry -; GFX12DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX12DAGISEL-NEXT: s_mov_b32 s4, exec_lo -; GFX12DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4 -; GFX12DAGISEL-NEXT: s_wait_kmcnt 0x0 -; GFX12DAGISEL-NEXT: s_mul_hi_u32 s5, s2, s4 -; GFX12DAGISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX12DAGISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX12DAGISEL-NEXT: s_add_co_u32 s3, s5, s3 -; GFX12DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX12DAGISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3 -; GFX12DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX12DAGISEL-NEXT: s_endpgm -entry: - %result = call i64 @llvm.amdgcn.wave.reduce.add.i64(i64 %in, i32 1) - store i64 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: const_value_i64: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3] -; GFX8DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX8DAGISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s3 -; GFX8DAGISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX8DAGISEL-NEXT: s_add_u32 s3, s4, s3 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s3 -; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: const_value_i64: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3] -; GFX8GISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX8GISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s3 -; GFX8GISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX8GISEL-NEXT: s_add_u32 s3, s4, s3 -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: const_value_i64: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3] -; GFX9DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX9DAGISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s3 -; GFX9DAGISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX9DAGISEL-NEXT: s_add_u32 s3, s4, s3 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: const_value_i64: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3] -; GFX9GISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX9GISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s3 -; GFX9GISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX9GISEL-NEXT: s_add_u32 s3, s4, s3 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX1064DAGISEL-LABEL: const_value_i64: -; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064DAGISEL-NEXT: s_mul_hi_u32 s3, 0x7b, s2 -; GFX1064DAGISEL-NEXT: s_mul_i32 s4, s2, 0 -; GFX1064DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1064DAGISEL-NEXT: s_add_u32 s3, s3, s4 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1064DAGISEL-NEXT: s_endpgm -; -; GFX1064GISEL-LABEL: const_value_i64: -; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064GISEL-NEXT: s_mul_hi_u32 s3, 0x7b, s2 -; GFX1064GISEL-NEXT: s_mul_i32 s4, s2, 0 -; GFX1064GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1064GISEL-NEXT: s_add_u32 s3, s3, s4 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1064GISEL-NEXT: s_endpgm -; -; GFX1032DAGISEL-LABEL: const_value_i64: -; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032DAGISEL-NEXT: s_mul_hi_u32 s3, 0x7b, s2 -; GFX1032DAGISEL-NEXT: s_mul_i32 s4, s2, 0 -; GFX1032DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1032DAGISEL-NEXT: s_add_u32 s3, s3, s4 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1032DAGISEL-NEXT: s_endpgm -; -; GFX1032GISEL-LABEL: const_value_i64: -; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032GISEL-NEXT: s_mul_hi_u32 s3, 0x7b, s2 -; GFX1032GISEL-NEXT: s_mul_i32 s4, s2, 0 -; GFX1032GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1032GISEL-NEXT: s_add_u32 s3, s3, s4 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1032GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: const_value_i64: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: s_mul_hi_u32 s3, 0x7b, s2 -; GFX1164DAGISEL-NEXT: s_mul_i32 s4, s2, 0 -; GFX1164DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1164DAGISEL-NEXT: s_add_u32 s3, s3, s4 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: const_value_i64: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: s_mul_hi_u32 s3, 0x7b, s2 -; GFX1164GISEL-NEXT: s_mul_i32 s4, s2, 0 -; GFX1164GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1164GISEL-NEXT: s_add_u32 s3, s3, s4 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: const_value_i64: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1132DAGISEL-NEXT: s_mul_i32 s1, s1, s2 +; GFX1132DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX1132DAGISEL-NEXT: s_or_saveexec_b32 s0, s0 +; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132DAGISEL-NEXT: s_mul_hi_u32 s3, 0x7b, s2 -; GFX1132DAGISEL-NEXT: s_mul_i32 s4, s2, 0 -; GFX1132DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1132DAGISEL-NEXT: s_add_u32 s3, s3, s4 -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3 +; GFX1132DAGISEL-NEXT: s_mov_b32 s1, 0 +; GFX1132DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 +; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 +; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 +; GFX1132DAGISEL-NEXT: s_add_i32 s1, s1, s6 +; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1132DAGISEL-NEXT: ; %bb.5: +; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX1132DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0 ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX1132DAGISEL-NEXT: s_endpgm ; -; GFX1132GISEL-LABEL: const_value_i64: +; GFX1132GISEL-LABEL: divergent_cfg: ; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1132GISEL-NEXT: s_mov_b32 s1, exec_lo +; GFX1132GISEL-NEXT: ; implicit-def: $sgpr0 +; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 +; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1132GISEL-NEXT: ; %bb.1: ; %else +; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0 +; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: s_mul_hi_u32 s3, 0x7b, s2 -; GFX1132GISEL-NEXT: s_mul_i32 s4, s2, 0 -; GFX1132GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1132GISEL-NEXT: s_add_u32 s3, s3, s4 -; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1132GISEL-NEXT: s_mul_i32 s0, s0, s2 +; GFX1132GISEL-NEXT: .LBB2_2: ; %Flow +; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_5 +; GFX1132GISEL-NEXT: ; %bb.3: ; %if +; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1132GISEL-NEXT: s_mov_b32 s0, 0 +; GFX1132GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 +; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3 +; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 +; GFX1132GISEL-NEXT: s_add_i32 s0, s0, s6 +; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1132GISEL-NEXT: .LBB2_5: ; %endif +; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 +; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0 +; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[2:3] ; GFX1132GISEL-NEXT: s_endpgm ; -; GFX12DAGISEL-LABEL: const_value_i64: +; GFX12DAGISEL-LABEL: divergent_cfg: ; GFX12DAGISEL: ; %bb.0: ; %entry -; GFX12DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX12DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX12DAGISEL-NEXT: s_mov_b32 s0, exec_lo +; GFX12DAGISEL-NEXT: ; implicit-def: $sgpr1 +; GFX12DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 +; GFX12DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0 +; GFX12DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX12DAGISEL-NEXT: ; %bb.1: ; %else +; GFX12DAGISEL-NEXT: s_load_b32 s1, s[4:5], 0x2c ; GFX12DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX12DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX12DAGISEL-NEXT: ; implicit-def: $vgpr0 +; GFX12DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX12DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX12DAGISEL-NEXT: s_mul_hi_u32 s3, 0x7b, s2 -; GFX12DAGISEL-NEXT: s_mul_i32 s4, s2, 0 -; GFX12DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX12DAGISEL-NEXT: s_add_co_u32 s3, s3, s4 -; GFX12DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX12DAGISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3 ; GFX12DAGISEL-NEXT: s_wait_kmcnt 0x0 -; GFX12DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX12DAGISEL-NEXT: s_mul_i32 s1, s1, s2 +; GFX12DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX12DAGISEL-NEXT: s_or_saveexec_b32 s0, s0 +; GFX12DAGISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX12DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX12DAGISEL-NEXT: ; %bb.3: ; %if +; GFX12DAGISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX12DAGISEL-NEXT: s_mov_b32 s1, 0 +; GFX12DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX12DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 +; GFX12DAGISEL-NEXT: s_wait_alu 0xfffe +; GFX12DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 +; GFX12DAGISEL-NEXT: s_bitset0_b32 s2, s3 +; GFX12DAGISEL-NEXT: s_add_co_i32 s1, s1, s6 +; GFX12DAGISEL-NEXT: s_wait_alu 0xfffe +; GFX12DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX12DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX12DAGISEL-NEXT: ; %bb.5: +; GFX12DAGISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX12DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX12DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX12DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX12DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX12DAGISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX12DAGISEL-NEXT: s_endpgm entry: - %result = call i64 @llvm.amdgcn.wave.reduce.add.i64(i64 123, i32 1) - store i64 %result, ptr addrspace(1) %out + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %d_cmp = icmp ult i32 %tid, 16 + br i1 %d_cmp, label %if, label %else + +if: + %reducedValTid = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %tid, i32 1) + br label %endif + +else: + %reducedValIn = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %in, i32 1) + br label %endif + +endif: + %combine = phi i32 [%reducedValTid, %if], [%reducedValIn, %else] + store i32 %combine, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) { -; GFX8DAGISEL-LABEL: poison_value_i64: +define amdgpu_kernel void @uniform_value_i64(ptr addrspace(1) %out, i64 %in) { +; GFX8DAGISEL-LABEL: uniform_value_i64: ; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3] +; GFX8DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], exec +; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX8DAGISEL-NEXT: s_mul_hi_u32 s4, s0, s3 -; GFX8DAGISEL-NEXT: s_mul_i32 s3, s1, s3 -; GFX8DAGISEL-NEXT: s_add_u32 s3, s4, s3 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s3 +; GFX8DAGISEL-NEXT: s_mul_i32 s0, s2, s4 +; GFX8DAGISEL-NEXT: s_mul_hi_u32 s1, s2, s4 +; GFX8DAGISEL-NEXT: s_mul_i32 s2, s3, s4 +; GFX8DAGISEL-NEXT: s_add_u32 s1, s1, s2 +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 ; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; GFX8DAGISEL-NEXT: s_endpgm ; -; GFX8GISEL-LABEL: poison_value_i64: +; GFX8GISEL-LABEL: uniform_value_i64: ; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3] +; GFX8GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX8GISEL-NEXT: s_mov_b64 s[4:5], exec +; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s5, s[4:5] ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX8GISEL-NEXT: s_mul_hi_u32 s4, s0, s3 -; GFX8GISEL-NEXT: s_mul_i32 s3, s1, s3 -; GFX8GISEL-NEXT: s_add_u32 s3, s4, s3 -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX8GISEL-NEXT: s_mul_i32 s4, s2, s5 +; GFX8GISEL-NEXT: s_mul_hi_u32 s2, s2, s5 +; GFX8GISEL-NEXT: s_mul_i32 s3, s3, s5 +; GFX8GISEL-NEXT: s_add_u32 s5, s2, s3 +; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s3 +; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s5 ; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0 ; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GFX8GISEL-NEXT: s_endpgm ; -; GFX9DAGISEL-LABEL: poison_value_i64: +; GFX9DAGISEL-LABEL: uniform_value_i64: ; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3] +; GFX9DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], exec +; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s5, s[4:5] ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0 ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX9DAGISEL-NEXT: s_mul_hi_u32 s4, s0, s3 -; GFX9DAGISEL-NEXT: s_mul_i32 s3, s1, s3 -; GFX9DAGISEL-NEXT: s_add_u32 s3, s4, s3 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s3 +; GFX9DAGISEL-NEXT: s_mul_i32 s4, s2, s5 +; GFX9DAGISEL-NEXT: s_mul_hi_u32 s2, s2, s5 +; GFX9DAGISEL-NEXT: s_mul_i32 s3, s3, s5 +; GFX9DAGISEL-NEXT: s_add_u32 s5, s2, s3 +; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 +; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s5 ; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX9DAGISEL-NEXT: s_endpgm ; -; GFX9GISEL-LABEL: poison_value_i64: +; GFX9GISEL-LABEL: uniform_value_i64: ; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3] +; GFX9GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9GISEL-NEXT: s_mov_b64 s[4:5], exec +; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s5, s[4:5] ; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX9GISEL-NEXT: s_mul_hi_u32 s4, s0, s3 -; GFX9GISEL-NEXT: s_mul_i32 s3, s1, s3 -; GFX9GISEL-NEXT: s_add_u32 s3, s4, s3 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s3 +; GFX9GISEL-NEXT: s_mul_i32 s4, s2, s5 +; GFX9GISEL-NEXT: s_mul_hi_u32 s2, s2, s5 +; GFX9GISEL-NEXT: s_mul_i32 s3, s3, s5 +; GFX9GISEL-NEXT: s_add_u32 s5, s2, s3 +; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4 +; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s5 ; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX9GISEL-NEXT: s_endpgm ; -; GFX1064DAGISEL-LABEL: poison_value_i64: +; GFX1064DAGISEL-LABEL: uniform_value_i64: ; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1064DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: s_mul_hi_u32 s3, s0, s2 -; GFX1064DAGISEL-NEXT: s_mul_i32 s4, s1, s2 -; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1064DAGISEL-NEXT: s_add_u32 s3, s3, s4 +; GFX1064DAGISEL-NEXT: s_mul_hi_u32 s5, s2, s4 +; GFX1064DAGISEL-NEXT: s_mul_i32 s3, s3, s4 +; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1064DAGISEL-NEXT: s_add_u32 s3, s5, s3 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1064DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX1064DAGISEL-NEXT: s_endpgm ; -; GFX1064GISEL-LABEL: poison_value_i64: +; GFX1064GISEL-LABEL: uniform_value_i64: ; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1064GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: s_mul_hi_u32 s3, s0, s2 -; GFX1064GISEL-NEXT: s_mul_i32 s4, s1, s2 -; GFX1064GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1064GISEL-NEXT: s_add_u32 s3, s3, s4 +; GFX1064GISEL-NEXT: s_mul_hi_u32 s5, s2, s4 +; GFX1064GISEL-NEXT: s_mul_i32 s3, s3, s4 +; GFX1064GISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1064GISEL-NEXT: s_add_u32 s3, s5, s3 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1064GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX1064GISEL-NEXT: s_endpgm ; -; GFX1032DAGISEL-LABEL: poison_value_i64: +; GFX1032DAGISEL-LABEL: uniform_value_i64: ; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1032DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX1032DAGISEL-NEXT: s_mov_b32 s4, exec_lo ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 +; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4 ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: s_mul_hi_u32 s3, s0, s2 -; GFX1032DAGISEL-NEXT: s_mul_i32 s4, s1, s2 -; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1032DAGISEL-NEXT: s_add_u32 s3, s3, s4 +; GFX1032DAGISEL-NEXT: s_mul_hi_u32 s5, s2, s4 +; GFX1032DAGISEL-NEXT: s_mul_i32 s3, s3, s4 +; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1032DAGISEL-NEXT: s_add_u32 s3, s5, s3 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1032DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX1032DAGISEL-NEXT: s_endpgm ; -; GFX1032GISEL-LABEL: poison_value_i64: +; GFX1032GISEL-LABEL: uniform_value_i64: ; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1032GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX1032GISEL-NEXT: s_mov_b32 s4, exec_lo ; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 +; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s4, s4 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: s_mul_hi_u32 s3, s0, s2 -; GFX1032GISEL-NEXT: s_mul_i32 s4, s1, s2 -; GFX1032GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1032GISEL-NEXT: s_add_u32 s3, s3, s4 +; GFX1032GISEL-NEXT: s_mul_hi_u32 s5, s2, s4 +; GFX1032GISEL-NEXT: s_mul_i32 s3, s3, s4 +; GFX1032GISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1032GISEL-NEXT: s_add_u32 s3, s5, s3 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1032GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX1032GISEL-NEXT: s_endpgm ; -; GFX1164DAGISEL-LABEL: poison_value_i64: +; GFX1164DAGISEL-LABEL: uniform_value_i64: ; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1164DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1164DAGISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: s_mul_hi_u32 s3, s0, s2 -; GFX1164DAGISEL-NEXT: s_mul_i32 s4, s1, s2 -; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1164DAGISEL-NEXT: s_add_u32 s3, s3, s4 +; GFX1164DAGISEL-NEXT: s_mul_hi_u32 s5, s2, s4 +; GFX1164DAGISEL-NEXT: s_mul_i32 s3, s3, s4 +; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1164DAGISEL-NEXT: s_add_u32 s3, s5, s3 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX1164DAGISEL-NEXT: s_endpgm ; -; GFX1164GISEL-LABEL: poison_value_i64: +; GFX1164GISEL-LABEL: uniform_value_i64: ; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1164GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1164GISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: s_mul_hi_u32 s3, s0, s2 -; GFX1164GISEL-NEXT: s_mul_i32 s4, s1, s2 -; GFX1164GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1164GISEL-NEXT: s_add_u32 s3, s3, s4 +; GFX1164GISEL-NEXT: s_mul_hi_u32 s5, s2, s4 +; GFX1164GISEL-NEXT: s_mul_i32 s3, s3, s4 +; GFX1164GISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1164GISEL-NEXT: s_add_u32 s3, s5, s3 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX1164GISEL-NEXT: s_endpgm ; -; GFX1132DAGISEL-LABEL: poison_value_i64: +; GFX1132DAGISEL-LABEL: uniform_value_i64: ; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1132DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1132DAGISEL-NEXT: s_mov_b32 s4, exec_lo ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 +; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4 ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: s_mul_hi_u32 s3, s0, s2 -; GFX1132DAGISEL-NEXT: s_mul_i32 s4, s1, s2 -; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1132DAGISEL-NEXT: s_add_u32 s3, s3, s4 +; GFX1132DAGISEL-NEXT: s_mul_hi_u32 s5, s2, s4 +; GFX1132DAGISEL-NEXT: s_mul_i32 s3, s3, s4 +; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1132DAGISEL-NEXT: s_add_u32 s3, s5, s3 ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3 ; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX1132DAGISEL-NEXT: s_endpgm ; -; GFX1132GISEL-LABEL: poison_value_i64: +; GFX1132GISEL-LABEL: uniform_value_i64: ; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1132GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1132GISEL-NEXT: s_mov_b32 s4, exec_lo ; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 +; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s4, s4 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: s_mul_hi_u32 s3, s0, s2 -; GFX1132GISEL-NEXT: s_mul_i32 s4, s1, s2 -; GFX1132GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1132GISEL-NEXT: s_add_u32 s3, s3, s4 +; GFX1132GISEL-NEXT: s_mul_hi_u32 s5, s2, s4 +; GFX1132GISEL-NEXT: s_mul_i32 s3, s3, s4 +; GFX1132GISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1132GISEL-NEXT: s_add_u32 s3, s5, s3 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX1132GISEL-NEXT: s_endpgm ; -; GFX12DAGISEL-LABEL: poison_value_i64: +; GFX12DAGISEL-LABEL: uniform_value_i64: ; GFX12DAGISEL: ; %bb.0: ; %entry -; GFX12DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX12DAGISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX12DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX12DAGISEL-NEXT: s_mov_b32 s4, exec_lo ; GFX12DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 +; GFX12DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4 ; GFX12DAGISEL-NEXT: s_wait_kmcnt 0x0 -; GFX12DAGISEL-NEXT: s_mul_hi_u32 s3, s0, s2 -; GFX12DAGISEL-NEXT: s_mul_i32 s4, s1, s2 -; GFX12DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX12DAGISEL-NEXT: s_add_co_u32 s3, s3, s4 +; GFX12DAGISEL-NEXT: s_mul_hi_u32 s5, s2, s4 +; GFX12DAGISEL-NEXT: s_mul_i32 s3, s3, s4 +; GFX12DAGISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX12DAGISEL-NEXT: s_add_co_u32 s3, s5, s3 ; GFX12DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX12DAGISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3 ; GFX12DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX12DAGISEL-NEXT: s_endpgm entry: - %result = call i64 @llvm.amdgcn.wave.reduce.add.i64(i64 poison, i32 1) + %result = call i64 @llvm.amdgcn.wave.reduce.add.i64(i64 %in, i32 1) store i64 %result, ptr addrspace(1) %out ret void } @@ -1985,7 +1215,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX8DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s8, s[6:7] ; GFX8DAGISEL-NEXT: v_readlane_b32 s9, v2, s8 ; GFX8DAGISEL-NEXT: v_readlane_b32 s10, v3, s8 @@ -1993,7 +1223,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[6:7], s8 ; GFX8DAGISEL-NEXT: s_addc_u32 s5, s5, s10 ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX8DAGISEL-NEXT: ; %bb.2: ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -2006,7 +1236,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX8GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s8, s[6:7] ; GFX8GISEL-NEXT: v_readlane_b32 s9, v2, s8 ; GFX8GISEL-NEXT: v_readlane_b32 s10, v3, s8 @@ -2014,7 +1244,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8GISEL-NEXT: s_bitset0_b64 s[6:7], s8 ; GFX8GISEL-NEXT: s_addc_u32 s5, s5, s10 ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX8GISEL-NEXT: ; %bb.2: ; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -2027,7 +1257,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX9DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s8, s[6:7] ; GFX9DAGISEL-NEXT: v_readlane_b32 s9, v2, s8 ; GFX9DAGISEL-NEXT: v_readlane_b32 s10, v3, s8 @@ -2035,7 +1265,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[6:7], s8 ; GFX9DAGISEL-NEXT: s_addc_u32 s5, s5, s10 ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX9DAGISEL-NEXT: ; %bb.2: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -2048,7 +1278,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX9GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s8, s[6:7] ; GFX9GISEL-NEXT: v_readlane_b32 s9, v2, s8 ; GFX9GISEL-NEXT: v_readlane_b32 s10, v3, s8 @@ -2056,7 +1286,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9GISEL-NEXT: s_bitset0_b64 s[6:7], s8 ; GFX9GISEL-NEXT: s_addc_u32 s5, s5, s10 ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX9GISEL-NEXT: ; %bb.2: ; GFX9GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -2069,7 +1299,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX1064DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s8, s[6:7] ; GFX1064DAGISEL-NEXT: v_readlane_b32 s9, v2, s8 ; GFX1064DAGISEL-NEXT: v_readlane_b32 s10, v3, s8 @@ -2077,7 +1307,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064DAGISEL-NEXT: s_add_u32 s4, s4, s9 ; GFX1064DAGISEL-NEXT: s_addc_u32 s5, s5, s10 ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1064DAGISEL-NEXT: ; %bb.2: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -2089,7 +1319,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX1064GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s8, s[6:7] ; GFX1064GISEL-NEXT: v_readlane_b32 s9, v2, s8 ; GFX1064GISEL-NEXT: v_readlane_b32 s10, v3, s8 @@ -2097,7 +1327,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064GISEL-NEXT: s_add_u32 s4, s4, s9 ; GFX1064GISEL-NEXT: s_addc_u32 s5, s5, s10 ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1064GISEL-NEXT: ; %bb.2: ; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -2109,7 +1339,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1032DAGISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo -; GFX1032DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s8, v2, s7 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s9, v3, s7 @@ -2117,7 +1347,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032DAGISEL-NEXT: s_add_u32 s4, s4, s8 ; GFX1032DAGISEL-NEXT: s_addc_u32 s5, s5, s9 ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1032DAGISEL-NEXT: ; %bb.2: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -2129,7 +1359,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo -; GFX1032GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032GISEL-NEXT: v_readlane_b32 s8, v2, s7 ; GFX1032GISEL-NEXT: v_readlane_b32 s9, v3, s7 @@ -2137,7 +1367,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032GISEL-NEXT: s_add_u32 s4, s4, s8 ; GFX1032GISEL-NEXT: s_addc_u32 s5, s5, s9 ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1032GISEL-NEXT: ; %bb.2: ; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -2149,7 +1379,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s4, s[2:3] ; GFX1164DAGISEL-NEXT: v_readlane_b32 s5, v2, s4 @@ -2158,7 +1388,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164DAGISEL-NEXT: s_add_u32 s0, s0, s5 ; GFX1164DAGISEL-NEXT: s_addc_u32 s1, s1, s6 ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1164DAGISEL-NEXT: ; %bb.2: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, s0 @@ -2170,7 +1400,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s4, s[2:3] ; GFX1164GISEL-NEXT: v_readlane_b32 s5, v2, s4 @@ -2179,7 +1409,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164GISEL-NEXT: s_add_u32 s0, s0, s5 ; GFX1164GISEL-NEXT: s_addc_u32 s1, s1, s6 ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1164GISEL-NEXT: ; %bb.2: ; GFX1164GISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, s0 @@ -2191,7 +1421,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1132DAGISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132DAGISEL-NEXT: v_readlane_b32 s4, v2, s3 @@ -2200,7 +1430,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132DAGISEL-NEXT: s_add_u32 s0, s0, s4 ; GFX1132DAGISEL-NEXT: s_addc_u32 s1, s1, s5 ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1132DAGISEL-NEXT: ; %bb.2: ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX1132DAGISEL-NEXT: global_store_b64 v[0:1], v[2:3], off @@ -2211,7 +1441,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132GISEL-NEXT: v_readlane_b32 s4, v2, s3 @@ -2220,7 +1450,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-NEXT: s_add_u32 s0, s0, s4 ; GFX1132GISEL-NEXT: s_addc_u32 s1, s1, s5 ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1132GISEL-NEXT: ; %bb.2: ; GFX1132GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX1132GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off @@ -2235,7 +1465,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX12DAGISEL-NEXT: s_wait_kmcnt 0x0 ; GFX12DAGISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX12DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX12DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX12DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX12DAGISEL-NEXT: s_wait_alu 0xfffe ; GFX12DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX12DAGISEL-NEXT: s_wait_alu 0xfffe @@ -2245,7 +1475,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX12DAGISEL-NEXT: s_wait_alu 0xfffe ; GFX12DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 ; GFX12DAGISEL-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[4:5] -; GFX12DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX12DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX12DAGISEL-NEXT: ; %bb.2: ; GFX12DAGISEL-NEXT: s_wait_alu 0xfffe ; GFX12DAGISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 @@ -2266,7 +1496,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8DAGISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX8DAGISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX8DAGISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX8DAGISEL-NEXT: ; %bb.1: ; %else ; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s7, s[6:7] @@ -2275,7 +1505,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8DAGISEL-NEXT: s_mul_hi_u32 s2, s2, s7 ; GFX8DAGISEL-NEXT: s_mul_i32 s3, s3, s7 ; GFX8DAGISEL-NEXT: s_add_u32 s7, s2, s3 -; GFX8DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX8DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9] ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -2304,7 +1534,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX8GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX8GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX8GISEL-NEXT: ; %bb.1: ; %else ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s7, s[6:7] @@ -2313,10 +1543,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8GISEL-NEXT: s_mul_hi_u32 s2, s2, s7 ; GFX8GISEL-NEXT: s_mul_i32 s3, s3, s7 ; GFX8GISEL-NEXT: s_add_u32 s7, s2, s3 -; GFX8GISEL-NEXT: .LBB9_2: ; %Flow +; GFX8GISEL-NEXT: .LBB5_2: ; %Flow ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX8GISEL-NEXT: ; %bb.3: ; %if ; GFX8GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec @@ -2326,7 +1556,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8GISEL-NEXT: s_mul_hi_u32 s4, s4, s7 ; GFX8GISEL-NEXT: s_mul_i32 s5, s5, s7 ; GFX8GISEL-NEXT: s_add_u32 s7, s4, s5 -; GFX8GISEL-NEXT: .LBB9_4: ; %endif +; GFX8GISEL-NEXT: .LBB5_4: ; %endif ; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 @@ -2343,7 +1573,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9DAGISEL-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GFX9DAGISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX9DAGISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX9DAGISEL-NEXT: ; %bb.1: ; %else ; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s5, s[4:5] @@ -2352,7 +1582,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9DAGISEL-NEXT: s_mul_hi_u32 s2, s2, s5 ; GFX9DAGISEL-NEXT: s_mul_i32 s3, s3, s5 ; GFX9DAGISEL-NEXT: s_add_u32 s5, s2, s3 -; GFX9DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX9DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9] ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 @@ -2380,7 +1610,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX9GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX9GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX9GISEL-NEXT: ; %bb.1: ; %else ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s7, s[6:7] @@ -2389,10 +1619,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9GISEL-NEXT: s_mul_hi_u32 s2, s2, s7 ; GFX9GISEL-NEXT: s_mul_i32 s3, s3, s7 ; GFX9GISEL-NEXT: s_add_u32 s7, s2, s3 -; GFX9GISEL-NEXT: .LBB9_2: ; %Flow +; GFX9GISEL-NEXT: .LBB5_2: ; %Flow ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX9GISEL-NEXT: ; %bb.3: ; %if ; GFX9GISEL-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x34 ; GFX9GISEL-NEXT: s_mov_b64 s[4:5], exec @@ -2402,7 +1632,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9GISEL-NEXT: s_mul_hi_u32 s5, s8, s4 ; GFX9GISEL-NEXT: s_mul_i32 s4, s9, s4 ; GFX9GISEL-NEXT: s_add_u32 s7, s5, s4 -; GFX9GISEL-NEXT: .LBB9_4: ; %endif +; GFX9GISEL-NEXT: .LBB5_4: ; %endif ; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -2419,7 +1649,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064DAGISEL-NEXT: ; implicit-def: $sgpr8_sgpr9 ; GFX1064DAGISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX1064DAGISEL-NEXT: s_xor_b64 s[4:5], exec, s[4:5] -; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1064DAGISEL-NEXT: ; %bb.1: ; %else ; GFX1064DAGISEL-NEXT: s_mov_b64 s[8:9], exec ; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s8, s[8:9] @@ -2428,7 +1658,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064DAGISEL-NEXT: s_mul_i32 s3, s3, s8 ; GFX1064DAGISEL-NEXT: s_mul_i32 s8, s2, s8 ; GFX1064DAGISEL-NEXT: s_add_u32 s9, s9, s3 -; GFX1064DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX1064DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[4:5] ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s8 @@ -2456,7 +1686,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX1064GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1064GISEL-NEXT: ; %bb.1: ; %else ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s6, s[6:7] @@ -2465,10 +1695,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064GISEL-NEXT: s_mul_i32 s3, s3, s6 ; GFX1064GISEL-NEXT: s_mul_i32 s6, s2, s6 ; GFX1064GISEL-NEXT: s_add_u32 s7, s7, s3 -; GFX1064GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1064GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1064GISEL-NEXT: ; %bb.3: ; %if ; GFX1064GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], exec @@ -2478,7 +1708,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064GISEL-NEXT: s_mul_i32 s7, s7, s4 ; GFX1064GISEL-NEXT: s_mul_i32 s6, s6, s4 ; GFX1064GISEL-NEXT: s_add_u32 s7, s5, s7 -; GFX1064GISEL-NEXT: .LBB9_4: ; %endif +; GFX1064GISEL-NEXT: .LBB5_4: ; %endif ; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -2495,7 +1725,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032DAGISEL-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GFX1032DAGISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GFX1032DAGISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1032DAGISEL-NEXT: ; %bb.1: ; %else ; GFX1032DAGISEL-NEXT: s_mov_b32 s4, exec_lo ; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4 @@ -2504,7 +1734,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032DAGISEL-NEXT: s_mul_i32 s3, s3, s4 ; GFX1032DAGISEL-NEXT: s_mul_i32 s4, s2, s4 ; GFX1032DAGISEL-NEXT: s_add_u32 s5, s5, s3 -; GFX1032DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX1032DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032DAGISEL-NEXT: s_or_saveexec_b32 s2, s8 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s4 @@ -2532,7 +1762,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX1032GISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GFX1032GISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1032GISEL-NEXT: ; %bb.1: ; %else ; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo ; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s6, s6 @@ -2541,10 +1771,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032GISEL-NEXT: s_mul_i32 s3, s3, s6 ; GFX1032GISEL-NEXT: s_mul_i32 s6, s2, s6 ; GFX1032GISEL-NEXT: s_add_u32 s7, s7, s3 -; GFX1032GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1032GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s2, s8 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1032GISEL-NEXT: ; %bb.3: ; %if ; GFX1032GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo @@ -2554,7 +1784,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032GISEL-NEXT: s_mul_i32 s5, s7, s3 ; GFX1032GISEL-NEXT: s_mul_i32 s6, s6, s3 ; GFX1032GISEL-NEXT: s_add_u32 s7, s4, s5 -; GFX1032GISEL-NEXT: .LBB9_4: ; %endif +; GFX1032GISEL-NEXT: .LBB5_4: ; %endif ; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -2573,7 +1803,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 ; GFX1164DAGISEL-NEXT: s_xor_b64 s[6:7], exec, s[6:7] -; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1164DAGISEL-NEXT: ; %bb.1: ; %else ; GFX1164DAGISEL-NEXT: s_mov_b64 s[8:9], exec ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -2583,7 +1813,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164DAGISEL-NEXT: s_mul_i32 s3, s3, s8 ; GFX1164DAGISEL-NEXT: s_mul_i32 s8, s2, s8 ; GFX1164DAGISEL-NEXT: s_add_u32 s9, s9, s3 -; GFX1164DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX1164DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[6:7] ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s8 @@ -2615,7 +1845,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1164GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1164GISEL-NEXT: ; %bb.1: ; %else ; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -2625,10 +1855,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164GISEL-NEXT: s_mul_i32 s3, s3, s6 ; GFX1164GISEL-NEXT: s_mul_i32 s6, s2, s6 ; GFX1164GISEL-NEXT: s_add_u32 s7, s7, s3 -; GFX1164GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1164GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[2:3], s[8:9] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1164GISEL-NEXT: ; %bb.3: ; %if ; GFX1164GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], exec @@ -2639,7 +1869,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164GISEL-NEXT: s_mul_i32 s5, s5, s6 ; GFX1164GISEL-NEXT: s_mul_i32 s6, s4, s6 ; GFX1164GISEL-NEXT: s_add_u32 s7, s7, s5 -; GFX1164GISEL-NEXT: .LBB9_4: ; %endif +; GFX1164GISEL-NEXT: .LBB5_4: ; %endif ; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -2658,7 +1888,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 ; GFX1132DAGISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1132DAGISEL-NEXT: ; %bb.1: ; %else ; GFX1132DAGISEL-NEXT: s_mov_b32 s6, exec_lo ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -2668,7 +1898,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132DAGISEL-NEXT: s_mul_i32 s3, s3, s6 ; GFX1132DAGISEL-NEXT: s_mul_i32 s6, s2, s6 ; GFX1132DAGISEL-NEXT: s_add_u32 s7, s7, s3 -; GFX1132DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX1132DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132DAGISEL-NEXT: s_or_saveexec_b32 s2, s8 ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 @@ -2698,7 +1928,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1132GISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1132GISEL-NEXT: ; %bb.1: ; %else ; GFX1132GISEL-NEXT: s_mov_b32 s6, exec_lo ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -2708,10 +1938,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132GISEL-NEXT: s_mul_i32 s3, s3, s6 ; GFX1132GISEL-NEXT: s_mul_i32 s6, s2, s6 ; GFX1132GISEL-NEXT: s_add_u32 s7, s7, s3 -; GFX1132GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1132GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s2, s8 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1132GISEL-NEXT: ; %bb.3: ; %if ; GFX1132GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo @@ -2722,7 +1952,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132GISEL-NEXT: s_mul_i32 s5, s5, s3 ; GFX1132GISEL-NEXT: s_mul_i32 s6, s4, s3 ; GFX1132GISEL-NEXT: s_add_u32 s7, s7, s5 -; GFX1132GISEL-NEXT: .LBB9_4: ; %endif +; GFX1132GISEL-NEXT: .LBB5_4: ; %endif ; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 ; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0 @@ -2740,7 +1970,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX12DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 ; GFX12DAGISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX12DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX12DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX12DAGISEL-NEXT: ; %bb.1: ; %else ; GFX12DAGISEL-NEXT: s_mov_b32 s6, exec_lo ; GFX12DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -2750,7 +1980,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX12DAGISEL-NEXT: s_mul_i32 s3, s3, s6 ; GFX12DAGISEL-NEXT: s_mul_i32 s6, s2, s6 ; GFX12DAGISEL-NEXT: s_add_co_u32 s7, s7, s3 -; GFX12DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX12DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX12DAGISEL-NEXT: s_wait_kmcnt 0x0 ; GFX12DAGISEL-NEXT: s_or_saveexec_b32 s2, s8 ; GFX12DAGISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll index 08a978b0b34c1..f39dd867f9580 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll @@ -124,187 +124,19 @@ entry: ret void } -define amdgpu_kernel void @const_value(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: const_value: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, 0x7b -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: const_value: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, 0x7b -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: const_value: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: const_value: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: const_value: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: const_value: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: const_value: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: const_value: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: const_value: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7b -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: const_value: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %result = call i32 @llvm.amdgcn.wave.reduce.and.i32(i32 123, i32 1) - store i32 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) { -; GFX8DAGISEL-LABEL: poison_value: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v0 -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: poison_value: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dword v[0:1], v0 -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: poison_value: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v0, v0, s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: poison_value: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dword v0, v0, s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: poison_value: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dword v0, v0, s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: poison_value: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dword v0, v0, s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX11DAGISEL-LABEL: poison_value: -; GFX11DAGISEL: ; %bb.0: ; %entry -; GFX11DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11DAGISEL-NEXT: global_store_b32 v0, v0, s[0:1] -; GFX11DAGISEL-NEXT: s_endpgm -; -; GFX11GISEL-LABEL: poison_value: -; GFX11GISEL: ; %bb.0: ; %entry -; GFX11GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11GISEL-NEXT: global_store_b32 v0, v0, s[0:1] -; GFX11GISEL-NEXT: s_endpgm -entry: - %result = call i32 @llvm.amdgcn.wave.reduce.and.i32(i32 poison, i32 1) - store i32 %result, ptr addrspace(1) %out - ret void -} - define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX8DAGISEL-LABEL: divergent_value: ; GFX8DAGISEL: ; %bb.0: ; %entry ; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8DAGISEL-NEXT: s_mov_b32 s4, -1 -; GFX8DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX8DAGISEL-NEXT: s_and_b32 s4, s4, s6 ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX8DAGISEL-NEXT: ; %bb.2: ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 @@ -318,13 +150,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8GISEL-NEXT: s_mov_b32 s4, -1 -; GFX8GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX8GISEL-NEXT: s_and_b32 s4, s4, s6 ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX8GISEL-NEXT: ; %bb.2: ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 @@ -339,13 +171,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9DAGISEL-NEXT: s_mov_b32 s4, -1 -; GFX9DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX9DAGISEL-NEXT: s_and_b32 s4, s4, s6 ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX9DAGISEL-NEXT: ; %bb.2: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -357,13 +189,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9GISEL-NEXT: s_mov_b32 s4, -1 -; GFX9GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX9GISEL-NEXT: s_and_b32 s4, s4, s6 ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX9GISEL-NEXT: ; %bb.2: ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -377,13 +209,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064DAGISEL-NEXT: s_mov_b32 s4, -1 -; GFX1064DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1064DAGISEL-NEXT: s_and_b32 s4, s4, s6 ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1064DAGISEL-NEXT: ; %bb.2: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -395,13 +227,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064GISEL-NEXT: s_mov_b32 s4, -1 -; GFX1064GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1064GISEL-NEXT: s_and_b32 s4, s4, s6 ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1064GISEL-NEXT: ; %bb.2: ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -415,13 +247,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1032DAGISEL-NEXT: s_mov_b32 s2, -1 -; GFX1032DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1032DAGISEL-NEXT: s_and_b32 s2, s2, s5 ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1032DAGISEL-NEXT: ; %bb.2: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -433,13 +265,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1032GISEL-NEXT: s_mov_b32 s2, -1 -; GFX1032GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3 ; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1032GISEL-NEXT: s_and_b32 s2, s2, s5 ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1032GISEL-NEXT: ; %bb.2: ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -454,14 +286,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164DAGISEL-NEXT: s_mov_b32 s4, -1 -; GFX1164DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1164DAGISEL-NEXT: s_and_b32 s4, s4, s6 ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1164DAGISEL-NEXT: ; %bb.2: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -474,14 +306,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164GISEL-NEXT: s_mov_b32 s4, -1 -; GFX1164GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1164GISEL-NEXT: s_and_b32 s4, s4, s6 ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1164GISEL-NEXT: ; %bb.2: ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -495,14 +327,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, -1 -; GFX1132DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1132DAGISEL-NEXT: s_and_b32 s2, s2, s5 ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1132DAGISEL-NEXT: ; %bb.2: ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -515,14 +347,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1132GISEL-NEXT: s_mov_b32 s2, -1 -; GFX1132GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3 ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1132GISEL-NEXT: s_and_b32 s2, s2, s5 ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1132GISEL-NEXT: ; %bb.2: ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -550,20 +382,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX8DAGISEL-NEXT: ; %bb.3: ; %if ; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8DAGISEL-NEXT: s_mov_b32 s6, -1 -; GFX8DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX8DAGISEL-NEXT: s_and_b32 s6, s6, s8 ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX8DAGISEL-NEXT: ; %bb.5: ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX8DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX8DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -578,26 +410,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX8GISEL-NEXT: ; implicit-def: $sgpr6 ; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX8GISEL-NEXT: ; %bb.1: ; %else ; GFX8GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX8GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b32 s6, s2 -; GFX8GISEL-NEXT: .LBB4_2: ; %Flow +; GFX8GISEL-NEXT: .LBB2_2: ; %Flow ; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX8GISEL-NEXT: ; %bb.3: ; %if ; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8GISEL-NEXT: s_mov_b32 s6, -1 -; GFX8GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX8GISEL-NEXT: s_and_b32 s6, s6, s8 ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX8GISEL-NEXT: .LBB4_5: ; %endif +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX8GISEL-NEXT: .LBB2_5: ; %endif ; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6 @@ -621,20 +453,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX9DAGISEL-NEXT: ; %bb.3: ; %if ; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9DAGISEL-NEXT: s_mov_b32 s6, -1 -; GFX9DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX9DAGISEL-NEXT: s_and_b32 s6, s6, s8 ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX9DAGISEL-NEXT: ; %bb.5: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX9DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX9DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -648,26 +480,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX9GISEL-NEXT: ; implicit-def: $sgpr6 ; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX9GISEL-NEXT: ; %bb.1: ; %else ; GFX9GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX9GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b32 s6, s2 -; GFX9GISEL-NEXT: .LBB4_2: ; %Flow +; GFX9GISEL-NEXT: .LBB2_2: ; %Flow ; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX9GISEL-NEXT: ; %bb.3: ; %if ; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9GISEL-NEXT: s_mov_b32 s6, -1 -; GFX9GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX9GISEL-NEXT: s_and_b32 s6, s6, s8 ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX9GISEL-NEXT: .LBB4_5: ; %endif +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX9GISEL-NEXT: .LBB2_5: ; %endif ; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -690,20 +522,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064DAGISEL-NEXT: s_mov_b32 s6, -1 -; GFX1064DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1064DAGISEL-NEXT: s_and_b32 s6, s6, s8 ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1064DAGISEL-NEXT: ; %bb.5: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX1064DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1064DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -717,26 +549,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6 ; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1064GISEL-NEXT: ; %bb.1: ; %else ; GFX1064GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b32 s6, s2 -; GFX1064GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1064GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1064GISEL-NEXT: ; %bb.3: ; %if ; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064GISEL-NEXT: s_mov_b32 s6, -1 -; GFX1064GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1064GISEL-NEXT: s_and_b32 s6, s6, s8 ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1064GISEL-NEXT: .LBB4_5: ; %endif +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1064GISEL-NEXT: .LBB2_5: ; %endif ; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -759,20 +591,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1032DAGISEL-NEXT: s_mov_b32 s1, -1 -; GFX1032DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1032DAGISEL-NEXT: s_and_b32 s1, s1, s6 ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1032DAGISEL-NEXT: ; %bb.5: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1032DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1032DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -786,26 +618,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0 ; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1032GISEL-NEXT: ; %bb.1: ; %else ; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c ; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b32 s0, s0 -; GFX1032GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1032GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1032GISEL-NEXT: ; %bb.3: ; %if ; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1032GISEL-NEXT: s_mov_b32 s0, -1 -; GFX1032GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2 ; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1032GISEL-NEXT: s_and_b32 s0, s0, s6 ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1032GISEL-NEXT: .LBB4_5: ; %endif +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1032GISEL-NEXT: .LBB2_5: ; %endif ; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0 @@ -830,21 +662,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164DAGISEL-NEXT: s_mov_b32 s6, -1 -; GFX1164DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1164DAGISEL-NEXT: s_and_b32 s6, s6, s8 ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1164DAGISEL-NEXT: ; %bb.5: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX1164DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1164DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -860,27 +692,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1164GISEL-NEXT: ; %bb.1: ; %else ; GFX1164GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c ; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b32 s6, s2 -; GFX1164GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1164GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1164GISEL-NEXT: ; %bb.3: ; %if ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164GISEL-NEXT: s_mov_b32 s6, -1 -; GFX1164GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1164GISEL-NEXT: s_and_b32 s6, s6, s8 ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1164GISEL-NEXT: .LBB4_5: ; %endif +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1164GISEL-NEXT: .LBB2_5: ; %endif ; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -905,21 +737,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132DAGISEL-NEXT: s_mov_b32 s1, -1 -; GFX1132DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132DAGISEL-NEXT: s_and_b32 s1, s1, s6 ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1132DAGISEL-NEXT: ; %bb.5: ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1132DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1132DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -935,27 +767,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1132GISEL-NEXT: ; %bb.1: ; %else ; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c ; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b32 s0, s0 -; GFX1132GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1132GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1132GISEL-NEXT: ; %bb.3: ; %if ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132GISEL-NEXT: s_mov_b32 s0, -1 -; GFX1132GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132GISEL-NEXT: s_and_b32 s0, s0, s6 ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1132GISEL-NEXT: .LBB4_5: ; %endif +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1132GISEL-NEXT: .LBB2_5: ; %endif ; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0 @@ -1087,198 +919,20 @@ entry: ret void } -define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: const_value_i64: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: const_value_i64: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: const_value_i64: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: const_value_i64: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: const_value_i64: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: const_value_i64: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX10GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: const_value_i64: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: const_value_i64: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: const_value_i64: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: const_value_i64: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1132GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %result = call i64 @llvm.amdgcn.wave.reduce.and.i64(i64 123, i32 1) - store i64 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) { -; GFX8DAGISEL-LABEL: poison_value_i64: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: poison_value_i64: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: poison_value_i64: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: poison_value_i64: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: poison_value_i64: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: poison_value_i64: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX11DAGISEL-LABEL: poison_value_i64: -; GFX11DAGISEL: ; %bb.0: ; %entry -; GFX11DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11DAGISEL-NEXT: global_store_b64 v0, v[0:1], s[0:1] -; GFX11DAGISEL-NEXT: s_endpgm -; -; GFX11GISEL-LABEL: poison_value_i64: -; GFX11GISEL: ; %bb.0: ; %entry -; GFX11GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11GISEL-NEXT: global_store_b64 v0, v[0:1], s[0:1] -; GFX11GISEL-NEXT: s_endpgm -entry: - %result = call i64 @llvm.amdgcn.wave.reduce.and.i64(i64 poison, i32 1) - store i64 %result, ptr addrspace(1) %out - ret void -} - define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-LABEL: divergent_value_i64: ; GFX8DAGISEL: ; %bb.0: ; %entry ; GFX8DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], -1 ; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX8DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s10, s[6:7] ; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v2, s10 ; GFX8DAGISEL-NEXT: v_readlane_b32 s9, v3, s10 ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[6:7], s10 ; GFX8DAGISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9] ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX8DAGISEL-NEXT: ; %bb.2: ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1291,14 +945,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b64 s[4:5], -1 ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX8GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s10, s[6:7] ; GFX8GISEL-NEXT: v_readlane_b32 s8, v2, s10 ; GFX8GISEL-NEXT: v_readlane_b32 s9, v3, s10 ; GFX8GISEL-NEXT: s_bitset0_b64 s[6:7], s10 ; GFX8GISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9] ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX8GISEL-NEXT: ; %bb.2: ; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1311,14 +965,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], -1 ; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX9DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s10, s[6:7] ; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v2, s10 ; GFX9DAGISEL-NEXT: v_readlane_b32 s9, v3, s10 ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[6:7], s10 ; GFX9DAGISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9] ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX9DAGISEL-NEXT: ; %bb.2: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1331,14 +985,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b64 s[4:5], -1 ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX9GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s10, s[6:7] ; GFX9GISEL-NEXT: v_readlane_b32 s8, v2, s10 ; GFX9GISEL-NEXT: v_readlane_b32 s9, v3, s10 ; GFX9GISEL-NEXT: s_bitset0_b64 s[6:7], s10 ; GFX9GISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9] ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX9GISEL-NEXT: ; %bb.2: ; GFX9GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1351,14 +1005,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], -1 ; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX1064DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s10, s[6:7] ; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v2, s10 ; GFX1064DAGISEL-NEXT: v_readlane_b32 s9, v3, s10 ; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[6:7], s10 ; GFX1064DAGISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9] ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1064DAGISEL-NEXT: ; %bb.2: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1370,14 +1024,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], -1 ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX1064GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s10, s[6:7] ; GFX1064GISEL-NEXT: v_readlane_b32 s8, v2, s10 ; GFX1064GISEL-NEXT: v_readlane_b32 s9, v3, s10 ; GFX1064GISEL-NEXT: s_bitset0_b64 s[6:7], s10 ; GFX1064GISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9] ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1064GISEL-NEXT: ; %bb.2: ; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1389,14 +1043,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1032DAGISEL-NEXT: s_mov_b64 s[4:5], -1 ; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo -; GFX1032DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s8, v2, s7 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s9, v3, s7 ; GFX1032DAGISEL-NEXT: s_bitset0_b32 s6, s7 ; GFX1032DAGISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9] ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1032DAGISEL-NEXT: ; %bb.2: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1408,14 +1062,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b64 s[4:5], -1 ; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo -; GFX1032GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032GISEL-NEXT: v_readlane_b32 s8, v2, s7 ; GFX1032GISEL-NEXT: v_readlane_b32 s9, v3, s7 ; GFX1032GISEL-NEXT: s_bitset0_b32 s6, s7 ; GFX1032GISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9] ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1032GISEL-NEXT: ; %bb.2: ; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1427,7 +1081,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], -1 ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s6, s[2:3] ; GFX1164DAGISEL-NEXT: v_readlane_b32 s4, v2, s6 @@ -1435,7 +1089,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s6 ; GFX1164DAGISEL-NEXT: s_and_b64 s[0:1], s[0:1], s[4:5] ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1164DAGISEL-NEXT: ; %bb.2: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, s0 @@ -1447,7 +1101,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], -1 ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s6, s[2:3] ; GFX1164GISEL-NEXT: v_readlane_b32 s4, v2, s6 @@ -1455,7 +1109,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s6 ; GFX1164GISEL-NEXT: s_and_b64 s[0:1], s[0:1], s[4:5] ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1164GISEL-NEXT: ; %bb.2: ; GFX1164GISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, s0 @@ -1467,7 +1121,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1132DAGISEL-NEXT: s_mov_b64 s[0:1], -1 ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132DAGISEL-NEXT: v_readlane_b32 s4, v2, s3 @@ -1475,7 +1129,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132DAGISEL-NEXT: s_and_b64 s[0:1], s[0:1], s[4:5] ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1132DAGISEL-NEXT: ; %bb.2: ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX1132DAGISEL-NEXT: global_store_b64 v[0:1], v[2:3], off @@ -1486,7 +1140,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b64 s[0:1], -1 ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132GISEL-NEXT: v_readlane_b32 s4, v2, s3 @@ -1494,7 +1148,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132GISEL-NEXT: s_and_b64 s[0:1], s[0:1], s[4:5] ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1132GISEL-NEXT: ; %bb.2: ; GFX1132GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX1132GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off @@ -1535,19 +1189,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX8GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX8GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX8GISEL-NEXT: ; %bb.1: ; %else ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX8GISEL-NEXT: .LBB9_2: ; %Flow +; GFX8GISEL-NEXT: .LBB5_2: ; %Flow ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX8GISEL-NEXT: ; %bb.3: ; %if ; GFX8GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX8GISEL-NEXT: .LBB9_4: ; %endif +; GFX8GISEL-NEXT: .LBB5_4: ; %endif ; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 @@ -1584,19 +1238,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX9GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX9GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX9GISEL-NEXT: ; %bb.1: ; %else ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9GISEL-NEXT: .LBB9_2: ; %Flow +; GFX9GISEL-NEXT: .LBB5_2: ; %Flow ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX9GISEL-NEXT: ; %bb.3: ; %if ; GFX9GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX9GISEL-NEXT: .LBB9_4: ; %endif +; GFX9GISEL-NEXT: .LBB5_4: ; %endif ; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1633,19 +1287,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX1064GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1064GISEL-NEXT: ; %bb.1: ; %else ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1064GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1064GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1064GISEL-NEXT: ; %bb.3: ; %if ; GFX1064GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX1064GISEL-NEXT: .LBB9_4: ; %endif +; GFX1064GISEL-NEXT: .LBB5_4: ; %endif ; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1682,19 +1336,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX1032GISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GFX1032GISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1032GISEL-NEXT: ; %bb.1: ; %else ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1032GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1032GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s2, s8 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1032GISEL-NEXT: ; %bb.3: ; %if ; GFX1032GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX1032GISEL-NEXT: .LBB9_4: ; %endif +; GFX1032GISEL-NEXT: .LBB5_4: ; %endif ; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1735,19 +1389,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1164GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1164GISEL-NEXT: ; %bb.1: ; %else ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1164GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1164GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[2:3], s[8:9] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1164GISEL-NEXT: ; %bb.3: ; %if ; GFX1164GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX1164GISEL-NEXT: .LBB9_4: ; %endif +; GFX1164GISEL-NEXT: .LBB5_4: ; %endif ; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1786,19 +1440,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1132GISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1132GISEL-NEXT: ; %bb.1: ; %else ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1132GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1132GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s2, s8 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1132GISEL-NEXT: ; %bb.3: ; %if ; GFX1132GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX1132GISEL-NEXT: .LBB9_4: ; %endif +; GFX1132GISEL-NEXT: .LBB5_4: ; %endif ; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 ; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0 @@ -1822,3 +1476,6 @@ endif: store i64 %combine, ptr addrspace(1) %out ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX11DAGISEL: {{.*}} +; GFX11GISEL: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll index ace65a03a5abb..6f299ab8bb9cf 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll @@ -124,187 +124,19 @@ entry: ret void } -define amdgpu_kernel void @const_value(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: const_value: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, 0x7b -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: const_value: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, 0x7b -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: const_value: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: const_value: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: const_value: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: const_value: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: const_value: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: const_value: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: const_value: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7b -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: const_value: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %result = call i32 @llvm.amdgcn.wave.reduce.max.i32(i32 123, i32 1) - store i32 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) { -; GFX8DAGISEL-LABEL: poison_value: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v0 -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: poison_value: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dword v[0:1], v0 -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: poison_value: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v0, v0, s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: poison_value: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dword v0, v0, s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: poison_value: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dword v0, v0, s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: poison_value: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dword v0, v0, s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX11DAGISEL-LABEL: poison_value: -; GFX11DAGISEL: ; %bb.0: ; %entry -; GFX11DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11DAGISEL-NEXT: global_store_b32 v0, v0, s[0:1] -; GFX11DAGISEL-NEXT: s_endpgm -; -; GFX11GISEL-LABEL: poison_value: -; GFX11GISEL: ; %bb.0: ; %entry -; GFX11GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11GISEL-NEXT: global_store_b32 v0, v0, s[0:1] -; GFX11GISEL-NEXT: s_endpgm -entry: - %result = call i32 @llvm.amdgcn.wave.reduce.max.i32(i32 poison, i32 1) - store i32 %result, ptr addrspace(1) %out - ret void -} - define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX8DAGISEL-LABEL: divergent_value: ; GFX8DAGISEL: ; %bb.0: ; %entry ; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8DAGISEL-NEXT: s_brev_b32 s4, 1 -; GFX8DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX8DAGISEL-NEXT: s_max_i32 s4, s4, s6 ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX8DAGISEL-NEXT: ; %bb.2: ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 @@ -318,13 +150,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8GISEL-NEXT: s_brev_b32 s4, 1 -; GFX8GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX8GISEL-NEXT: s_max_i32 s4, s4, s6 ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX8GISEL-NEXT: ; %bb.2: ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 @@ -339,13 +171,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9DAGISEL-NEXT: s_brev_b32 s4, 1 -; GFX9DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX9DAGISEL-NEXT: s_max_i32 s4, s4, s6 ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX9DAGISEL-NEXT: ; %bb.2: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -357,13 +189,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9GISEL-NEXT: s_brev_b32 s4, 1 -; GFX9GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX9GISEL-NEXT: s_max_i32 s4, s4, s6 ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX9GISEL-NEXT: ; %bb.2: ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -377,13 +209,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064DAGISEL-NEXT: s_brev_b32 s4, 1 -; GFX1064DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1064DAGISEL-NEXT: s_max_i32 s4, s4, s6 ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1064DAGISEL-NEXT: ; %bb.2: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -395,13 +227,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064GISEL-NEXT: s_brev_b32 s4, 1 -; GFX1064GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1064GISEL-NEXT: s_max_i32 s4, s4, s6 ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1064GISEL-NEXT: ; %bb.2: ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -415,13 +247,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1032DAGISEL-NEXT: s_brev_b32 s2, 1 -; GFX1032DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1032DAGISEL-NEXT: s_max_i32 s2, s2, s5 ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1032DAGISEL-NEXT: ; %bb.2: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -433,13 +265,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1032GISEL-NEXT: s_brev_b32 s2, 1 -; GFX1032GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3 ; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1032GISEL-NEXT: s_max_i32 s2, s2, s5 ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1032GISEL-NEXT: ; %bb.2: ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -454,14 +286,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164DAGISEL-NEXT: s_brev_b32 s4, 1 -; GFX1164DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1164DAGISEL-NEXT: s_max_i32 s4, s4, s6 ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1164DAGISEL-NEXT: ; %bb.2: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -474,14 +306,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164GISEL-NEXT: s_brev_b32 s4, 1 -; GFX1164GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1164GISEL-NEXT: s_max_i32 s4, s4, s6 ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1164GISEL-NEXT: ; %bb.2: ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -495,14 +327,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1132DAGISEL-NEXT: s_brev_b32 s2, 1 -; GFX1132DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1132DAGISEL-NEXT: s_max_i32 s2, s2, s5 ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1132DAGISEL-NEXT: ; %bb.2: ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -515,14 +347,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1132GISEL-NEXT: s_brev_b32 s2, 1 -; GFX1132GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3 ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1132GISEL-NEXT: s_max_i32 s2, s2, s5 ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1132GISEL-NEXT: ; %bb.2: ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -550,20 +382,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX8DAGISEL-NEXT: ; %bb.3: ; %if ; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8DAGISEL-NEXT: s_brev_b32 s6, 1 -; GFX8DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX8DAGISEL-NEXT: s_max_i32 s6, s6, s8 ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX8DAGISEL-NEXT: ; %bb.5: ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX8DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX8DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -578,26 +410,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX8GISEL-NEXT: ; implicit-def: $sgpr6 ; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX8GISEL-NEXT: ; %bb.1: ; %else ; GFX8GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX8GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b32 s6, s2 -; GFX8GISEL-NEXT: .LBB4_2: ; %Flow +; GFX8GISEL-NEXT: .LBB2_2: ; %Flow ; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX8GISEL-NEXT: ; %bb.3: ; %if ; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8GISEL-NEXT: s_brev_b32 s6, 1 -; GFX8GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX8GISEL-NEXT: s_max_i32 s6, s6, s8 ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX8GISEL-NEXT: .LBB4_5: ; %endif +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX8GISEL-NEXT: .LBB2_5: ; %endif ; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6 @@ -621,20 +453,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX9DAGISEL-NEXT: ; %bb.3: ; %if ; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9DAGISEL-NEXT: s_brev_b32 s6, 1 -; GFX9DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX9DAGISEL-NEXT: s_max_i32 s6, s6, s8 ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX9DAGISEL-NEXT: ; %bb.5: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX9DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX9DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -648,26 +480,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX9GISEL-NEXT: ; implicit-def: $sgpr6 ; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX9GISEL-NEXT: ; %bb.1: ; %else ; GFX9GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX9GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b32 s6, s2 -; GFX9GISEL-NEXT: .LBB4_2: ; %Flow +; GFX9GISEL-NEXT: .LBB2_2: ; %Flow ; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX9GISEL-NEXT: ; %bb.3: ; %if ; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9GISEL-NEXT: s_brev_b32 s6, 1 -; GFX9GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX9GISEL-NEXT: s_max_i32 s6, s6, s8 ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX9GISEL-NEXT: .LBB4_5: ; %endif +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX9GISEL-NEXT: .LBB2_5: ; %endif ; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -690,20 +522,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064DAGISEL-NEXT: s_brev_b32 s6, 1 -; GFX1064DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1064DAGISEL-NEXT: s_max_i32 s6, s6, s8 ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1064DAGISEL-NEXT: ; %bb.5: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX1064DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1064DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -717,26 +549,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6 ; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1064GISEL-NEXT: ; %bb.1: ; %else ; GFX1064GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b32 s6, s2 -; GFX1064GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1064GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1064GISEL-NEXT: ; %bb.3: ; %if ; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064GISEL-NEXT: s_brev_b32 s6, 1 -; GFX1064GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1064GISEL-NEXT: s_max_i32 s6, s6, s8 ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1064GISEL-NEXT: .LBB4_5: ; %endif +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1064GISEL-NEXT: .LBB2_5: ; %endif ; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -759,20 +591,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1032DAGISEL-NEXT: s_brev_b32 s1, 1 -; GFX1032DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1032DAGISEL-NEXT: s_max_i32 s1, s1, s6 ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1032DAGISEL-NEXT: ; %bb.5: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1032DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1032DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -786,26 +618,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0 ; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1032GISEL-NEXT: ; %bb.1: ; %else ; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c ; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b32 s0, s0 -; GFX1032GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1032GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1032GISEL-NEXT: ; %bb.3: ; %if ; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1032GISEL-NEXT: s_brev_b32 s0, 1 -; GFX1032GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2 ; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1032GISEL-NEXT: s_max_i32 s0, s0, s6 ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1032GISEL-NEXT: .LBB4_5: ; %endif +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1032GISEL-NEXT: .LBB2_5: ; %endif ; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0 @@ -830,21 +662,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164DAGISEL-NEXT: s_brev_b32 s6, 1 -; GFX1164DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1164DAGISEL-NEXT: s_max_i32 s6, s6, s8 ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1164DAGISEL-NEXT: ; %bb.5: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX1164DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1164DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -860,27 +692,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1164GISEL-NEXT: ; %bb.1: ; %else ; GFX1164GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c ; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b32 s6, s2 -; GFX1164GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1164GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1164GISEL-NEXT: ; %bb.3: ; %if ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164GISEL-NEXT: s_brev_b32 s6, 1 -; GFX1164GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1164GISEL-NEXT: s_max_i32 s6, s6, s8 ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1164GISEL-NEXT: .LBB4_5: ; %endif +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1164GISEL-NEXT: .LBB2_5: ; %endif ; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -905,21 +737,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132DAGISEL-NEXT: s_brev_b32 s1, 1 -; GFX1132DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132DAGISEL-NEXT: s_max_i32 s1, s1, s6 ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1132DAGISEL-NEXT: ; %bb.5: ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1132DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1132DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -935,27 +767,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1132GISEL-NEXT: ; %bb.1: ; %else ; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c ; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b32 s0, s0 -; GFX1132GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1132GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1132GISEL-NEXT: ; %bb.3: ; %if ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132GISEL-NEXT: s_brev_b32 s0, 1 -; GFX1132GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132GISEL-NEXT: s_max_i32 s0, s0, s6 ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1132GISEL-NEXT: .LBB4_5: ; %endif +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1132GISEL-NEXT: .LBB2_5: ; %endif ; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0 @@ -1087,184 +919,6 @@ entry: ret void } -define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: const_value_i64: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: const_value_i64: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: const_value_i64: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: const_value_i64: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: const_value_i64: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: const_value_i64: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX10GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: const_value_i64: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: const_value_i64: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: const_value_i64: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: const_value_i64: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1132GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %result = call i64 @llvm.amdgcn.wave.reduce.max.i64(i64 123, i32 1) - store i64 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) { -; GFX8DAGISEL-LABEL: poison_value_i64: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: poison_value_i64: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: poison_value_i64: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: poison_value_i64: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: poison_value_i64: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: poison_value_i64: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX11DAGISEL-LABEL: poison_value_i64: -; GFX11DAGISEL: ; %bb.0: ; %entry -; GFX11DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11DAGISEL-NEXT: global_store_b64 v0, v[0:1], s[0:1] -; GFX11DAGISEL-NEXT: s_endpgm -; -; GFX11GISEL-LABEL: poison_value_i64: -; GFX11GISEL: ; %bb.0: ; %entry -; GFX11GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11GISEL-NEXT: global_store_b64 v0, v[0:1], s[0:1] -; GFX11GISEL-NEXT: s_endpgm -entry: - %result = call i64 @llvm.amdgcn.wave.reduce.max.i64(i64 poison, i32 1) - store i64 %result, ptr addrspace(1) %out - ret void -} - define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-LABEL: divergent_value_i64: ; GFX8DAGISEL: ; %bb.0: ; %entry @@ -1272,7 +926,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0 ; GFX8DAGISEL-NEXT: s_brev_b32 s5, 1 ; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX8DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1283,7 +937,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX8DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX8DAGISEL-NEXT: ; %bb.2: ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1297,7 +951,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8GISEL-NEXT: s_mov_b32 s4, 0 ; GFX8GISEL-NEXT: s_brev_b32 s5, 1 ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX8GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX8GISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX8GISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1308,7 +962,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8GISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX8GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX8GISEL-NEXT: ; %bb.2: ; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1322,7 +976,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0 ; GFX9DAGISEL-NEXT: s_brev_b32 s5, 1 ; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX9DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1333,7 +987,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX9DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX9DAGISEL-NEXT: ; %bb.2: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1347,7 +1001,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9GISEL-NEXT: s_mov_b32 s4, 0 ; GFX9GISEL-NEXT: s_brev_b32 s5, 1 ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX9GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX9GISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1358,7 +1012,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9GISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX9GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX9GISEL-NEXT: ; %bb.2: ; GFX9GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1372,7 +1026,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0 ; GFX1064DAGISEL-NEXT: s_brev_b32 s5, 1 ; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX1064DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1383,7 +1037,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX1064DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1064DAGISEL-NEXT: ; %bb.2: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1396,7 +1050,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064GISEL-NEXT: s_mov_b32 s4, 0 ; GFX1064GISEL-NEXT: s_brev_b32 s5, 1 ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX1064GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX1064GISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1407,7 +1061,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064GISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX1064GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1064GISEL-NEXT: ; %bb.2: ; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1420,7 +1074,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032DAGISEL-NEXT: s_mov_b32 s4, 0 ; GFX1032DAGISEL-NEXT: s_brev_b32 s5, 1 ; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo -; GFX1032DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1431,7 +1085,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032DAGISEL-NEXT: s_bitset0_b32 s6, s7 ; GFX1032DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1032DAGISEL-NEXT: ; %bb.2: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1444,7 +1098,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032GISEL-NEXT: s_mov_b32 s4, 0 ; GFX1032GISEL-NEXT: s_brev_b32 s5, 1 ; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo -; GFX1032GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1455,7 +1109,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032GISEL-NEXT: s_bitset0_b32 s6, s7 ; GFX1032GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1032GISEL-NEXT: ; %bb.2: ; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1468,7 +1122,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164DAGISEL-NEXT: s_mov_b32 s0, 0 ; GFX1164DAGISEL-NEXT: s_brev_b32 s1, 1 ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s8, s[2:3] ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v4, s0 @@ -1480,7 +1134,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s8 ; GFX1164DAGISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1164DAGISEL-NEXT: ; %bb.2: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, s0 @@ -1493,7 +1147,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164GISEL-NEXT: s_mov_b32 s0, 0 ; GFX1164GISEL-NEXT: s_brev_b32 s1, 1 ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s8, s[2:3] ; GFX1164GISEL-NEXT: v_mov_b32_e32 v4, s0 @@ -1505,7 +1159,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s8 ; GFX1164GISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1164GISEL-NEXT: ; %bb.2: ; GFX1164GISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, s0 @@ -1518,7 +1172,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132DAGISEL-NEXT: s_mov_b32 s0, 0 ; GFX1132DAGISEL-NEXT: s_brev_b32 s1, 1 ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 @@ -1529,7 +1183,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132DAGISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1132DAGISEL-NEXT: ; %bb.2: ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX1132DAGISEL-NEXT: global_store_b64 v[0:1], v[2:3], off @@ -1541,7 +1195,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-NEXT: s_mov_b32 s0, 0 ; GFX1132GISEL-NEXT: s_brev_b32 s1, 1 ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 @@ -1552,7 +1206,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132GISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1132GISEL-NEXT: ; %bb.2: ; GFX1132GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX1132GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off @@ -1593,19 +1247,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX8GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX8GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX8GISEL-NEXT: ; %bb.1: ; %else ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX8GISEL-NEXT: .LBB9_2: ; %Flow +; GFX8GISEL-NEXT: .LBB5_2: ; %Flow ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX8GISEL-NEXT: ; %bb.3: ; %if ; GFX8GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX8GISEL-NEXT: .LBB9_4: ; %endif +; GFX8GISEL-NEXT: .LBB5_4: ; %endif ; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 @@ -1642,19 +1296,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX9GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX9GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX9GISEL-NEXT: ; %bb.1: ; %else ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9GISEL-NEXT: .LBB9_2: ; %Flow +; GFX9GISEL-NEXT: .LBB5_2: ; %Flow ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX9GISEL-NEXT: ; %bb.3: ; %if ; GFX9GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX9GISEL-NEXT: .LBB9_4: ; %endif +; GFX9GISEL-NEXT: .LBB5_4: ; %endif ; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1691,19 +1345,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX1064GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1064GISEL-NEXT: ; %bb.1: ; %else ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1064GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1064GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1064GISEL-NEXT: ; %bb.3: ; %if ; GFX1064GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX1064GISEL-NEXT: .LBB9_4: ; %endif +; GFX1064GISEL-NEXT: .LBB5_4: ; %endif ; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1740,19 +1394,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX1032GISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GFX1032GISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1032GISEL-NEXT: ; %bb.1: ; %else ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1032GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1032GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s2, s8 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1032GISEL-NEXT: ; %bb.3: ; %if ; GFX1032GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX1032GISEL-NEXT: .LBB9_4: ; %endif +; GFX1032GISEL-NEXT: .LBB5_4: ; %endif ; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1793,19 +1447,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1164GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1164GISEL-NEXT: ; %bb.1: ; %else ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1164GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1164GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[2:3], s[8:9] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1164GISEL-NEXT: ; %bb.3: ; %if ; GFX1164GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX1164GISEL-NEXT: .LBB9_4: ; %endif +; GFX1164GISEL-NEXT: .LBB5_4: ; %endif ; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1844,19 +1498,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1132GISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1132GISEL-NEXT: ; %bb.1: ; %else ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1132GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1132GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s2, s8 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1132GISEL-NEXT: ; %bb.3: ; %if ; GFX1132GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX1132GISEL-NEXT: .LBB9_4: ; %endif +; GFX1132GISEL-NEXT: .LBB5_4: ; %endif ; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 ; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0 @@ -1880,3 +1534,6 @@ endif: store i64 %combine, ptr addrspace(1) %out ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX11DAGISEL: {{.*}} +; GFX11GISEL: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll index b12537eb0cebe..3c4cbc74aedc1 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll @@ -124,187 +124,19 @@ entry: ret void } -define amdgpu_kernel void @const_value(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: const_value: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, 0x7b -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: const_value: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, 0x7b -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: const_value: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: const_value: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: const_value: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: const_value: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: const_value: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: const_value: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: const_value: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7b -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: const_value: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %result = call i32 @llvm.amdgcn.wave.reduce.min.i32(i32 123, i32 1) - store i32 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) { -; GFX8DAGISEL-LABEL: poison_value: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v0 -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: poison_value: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dword v[0:1], v0 -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: poison_value: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v0, v0, s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: poison_value: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dword v0, v0, s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: poison_value: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dword v0, v0, s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: poison_value: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dword v0, v0, s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX11DAGISEL-LABEL: poison_value: -; GFX11DAGISEL: ; %bb.0: ; %entry -; GFX11DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11DAGISEL-NEXT: global_store_b32 v0, v0, s[0:1] -; GFX11DAGISEL-NEXT: s_endpgm -; -; GFX11GISEL-LABEL: poison_value: -; GFX11GISEL: ; %bb.0: ; %entry -; GFX11GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11GISEL-NEXT: global_store_b32 v0, v0, s[0:1] -; GFX11GISEL-NEXT: s_endpgm -entry: - %result = call i32 @llvm.amdgcn.wave.reduce.min.i32(i32 poison, i32 1) - store i32 %result, ptr addrspace(1) %out - ret void -} - define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX8DAGISEL-LABEL: divergent_value: ; GFX8DAGISEL: ; %bb.0: ; %entry ; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8DAGISEL-NEXT: s_brev_b32 s4, -2 -; GFX8DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX8DAGISEL-NEXT: s_min_i32 s4, s4, s6 ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX8DAGISEL-NEXT: ; %bb.2: ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 @@ -318,13 +150,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8GISEL-NEXT: s_brev_b32 s4, -2 -; GFX8GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX8GISEL-NEXT: s_min_i32 s4, s4, s6 ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX8GISEL-NEXT: ; %bb.2: ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 @@ -339,13 +171,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9DAGISEL-NEXT: s_brev_b32 s4, -2 -; GFX9DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX9DAGISEL-NEXT: s_min_i32 s4, s4, s6 ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX9DAGISEL-NEXT: ; %bb.2: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -357,13 +189,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9GISEL-NEXT: s_brev_b32 s4, -2 -; GFX9GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX9GISEL-NEXT: s_min_i32 s4, s4, s6 ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX9GISEL-NEXT: ; %bb.2: ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -377,13 +209,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064DAGISEL-NEXT: s_brev_b32 s4, -2 -; GFX1064DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1064DAGISEL-NEXT: s_min_i32 s4, s4, s6 ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1064DAGISEL-NEXT: ; %bb.2: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -395,13 +227,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064GISEL-NEXT: s_brev_b32 s4, -2 -; GFX1064GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1064GISEL-NEXT: s_min_i32 s4, s4, s6 ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1064GISEL-NEXT: ; %bb.2: ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -415,13 +247,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1032DAGISEL-NEXT: s_brev_b32 s2, -2 -; GFX1032DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1032DAGISEL-NEXT: s_min_i32 s2, s2, s5 ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1032DAGISEL-NEXT: ; %bb.2: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -433,13 +265,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1032GISEL-NEXT: s_brev_b32 s2, -2 -; GFX1032GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3 ; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1032GISEL-NEXT: s_min_i32 s2, s2, s5 ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1032GISEL-NEXT: ; %bb.2: ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -454,14 +286,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164DAGISEL-NEXT: s_brev_b32 s4, -2 -; GFX1164DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1164DAGISEL-NEXT: s_min_i32 s4, s4, s6 ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1164DAGISEL-NEXT: ; %bb.2: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -474,14 +306,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164GISEL-NEXT: s_brev_b32 s4, -2 -; GFX1164GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1164GISEL-NEXT: s_min_i32 s4, s4, s6 ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1164GISEL-NEXT: ; %bb.2: ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -495,14 +327,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1132DAGISEL-NEXT: s_brev_b32 s2, -2 -; GFX1132DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1132DAGISEL-NEXT: s_min_i32 s2, s2, s5 ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1132DAGISEL-NEXT: ; %bb.2: ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -515,14 +347,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1132GISEL-NEXT: s_brev_b32 s2, -2 -; GFX1132GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3 ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1132GISEL-NEXT: s_min_i32 s2, s2, s5 ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1132GISEL-NEXT: ; %bb.2: ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -550,20 +382,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX8DAGISEL-NEXT: ; %bb.3: ; %if ; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8DAGISEL-NEXT: s_brev_b32 s6, -2 -; GFX8DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX8DAGISEL-NEXT: s_min_i32 s6, s6, s8 ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX8DAGISEL-NEXT: ; %bb.5: ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX8DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX8DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -578,26 +410,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX8GISEL-NEXT: ; implicit-def: $sgpr6 ; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX8GISEL-NEXT: ; %bb.1: ; %else ; GFX8GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX8GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b32 s6, s2 -; GFX8GISEL-NEXT: .LBB4_2: ; %Flow +; GFX8GISEL-NEXT: .LBB2_2: ; %Flow ; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX8GISEL-NEXT: ; %bb.3: ; %if ; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8GISEL-NEXT: s_brev_b32 s6, -2 -; GFX8GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX8GISEL-NEXT: s_min_i32 s6, s6, s8 ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX8GISEL-NEXT: .LBB4_5: ; %endif +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX8GISEL-NEXT: .LBB2_5: ; %endif ; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6 @@ -621,20 +453,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX9DAGISEL-NEXT: ; %bb.3: ; %if ; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9DAGISEL-NEXT: s_brev_b32 s6, -2 -; GFX9DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX9DAGISEL-NEXT: s_min_i32 s6, s6, s8 ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX9DAGISEL-NEXT: ; %bb.5: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX9DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX9DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -648,26 +480,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX9GISEL-NEXT: ; implicit-def: $sgpr6 ; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX9GISEL-NEXT: ; %bb.1: ; %else ; GFX9GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX9GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b32 s6, s2 -; GFX9GISEL-NEXT: .LBB4_2: ; %Flow +; GFX9GISEL-NEXT: .LBB2_2: ; %Flow ; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX9GISEL-NEXT: ; %bb.3: ; %if ; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9GISEL-NEXT: s_brev_b32 s6, -2 -; GFX9GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX9GISEL-NEXT: s_min_i32 s6, s6, s8 ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX9GISEL-NEXT: .LBB4_5: ; %endif +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX9GISEL-NEXT: .LBB2_5: ; %endif ; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -690,20 +522,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064DAGISEL-NEXT: s_brev_b32 s6, -2 -; GFX1064DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1064DAGISEL-NEXT: s_min_i32 s6, s6, s8 ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1064DAGISEL-NEXT: ; %bb.5: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX1064DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1064DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -717,26 +549,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6 ; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1064GISEL-NEXT: ; %bb.1: ; %else ; GFX1064GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b32 s6, s2 -; GFX1064GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1064GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1064GISEL-NEXT: ; %bb.3: ; %if ; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064GISEL-NEXT: s_brev_b32 s6, -2 -; GFX1064GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1064GISEL-NEXT: s_min_i32 s6, s6, s8 ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1064GISEL-NEXT: .LBB4_5: ; %endif +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1064GISEL-NEXT: .LBB2_5: ; %endif ; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -759,20 +591,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1032DAGISEL-NEXT: s_brev_b32 s1, -2 -; GFX1032DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1032DAGISEL-NEXT: s_min_i32 s1, s1, s6 ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1032DAGISEL-NEXT: ; %bb.5: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1032DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1032DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -786,26 +618,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0 ; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1032GISEL-NEXT: ; %bb.1: ; %else ; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c ; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b32 s0, s0 -; GFX1032GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1032GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1032GISEL-NEXT: ; %bb.3: ; %if ; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1032GISEL-NEXT: s_brev_b32 s0, -2 -; GFX1032GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2 ; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1032GISEL-NEXT: s_min_i32 s0, s0, s6 ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1032GISEL-NEXT: .LBB4_5: ; %endif +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1032GISEL-NEXT: .LBB2_5: ; %endif ; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0 @@ -830,21 +662,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164DAGISEL-NEXT: s_brev_b32 s6, -2 -; GFX1164DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1164DAGISEL-NEXT: s_min_i32 s6, s6, s8 ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1164DAGISEL-NEXT: ; %bb.5: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX1164DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1164DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -860,27 +692,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1164GISEL-NEXT: ; %bb.1: ; %else ; GFX1164GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c ; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b32 s6, s2 -; GFX1164GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1164GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1164GISEL-NEXT: ; %bb.3: ; %if ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164GISEL-NEXT: s_brev_b32 s6, -2 -; GFX1164GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1164GISEL-NEXT: s_min_i32 s6, s6, s8 ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1164GISEL-NEXT: .LBB4_5: ; %endif +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1164GISEL-NEXT: .LBB2_5: ; %endif ; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -905,21 +737,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132DAGISEL-NEXT: s_brev_b32 s1, -2 -; GFX1132DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132DAGISEL-NEXT: s_min_i32 s1, s1, s6 ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1132DAGISEL-NEXT: ; %bb.5: ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1132DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1132DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -935,27 +767,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1132GISEL-NEXT: ; %bb.1: ; %else ; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c ; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b32 s0, s0 -; GFX1132GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1132GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1132GISEL-NEXT: ; %bb.3: ; %if ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132GISEL-NEXT: s_brev_b32 s0, -2 -; GFX1132GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132GISEL-NEXT: s_min_i32 s0, s0, s6 ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1132GISEL-NEXT: .LBB4_5: ; %endif +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1132GISEL-NEXT: .LBB2_5: ; %endif ; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0 @@ -1087,184 +919,6 @@ entry: ret void } -define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: const_value_i64: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: const_value_i64: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: const_value_i64: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: const_value_i64: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: const_value_i64: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: const_value_i64: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX10GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: const_value_i64: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: const_value_i64: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: const_value_i64: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: const_value_i64: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1132GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %result = call i64 @llvm.amdgcn.wave.reduce.min.i64(i64 123, i32 1) - store i64 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) { -; GFX8DAGISEL-LABEL: poison_value_i64: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: poison_value_i64: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: poison_value_i64: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: poison_value_i64: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: poison_value_i64: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: poison_value_i64: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX11DAGISEL-LABEL: poison_value_i64: -; GFX11DAGISEL: ; %bb.0: ; %entry -; GFX11DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11DAGISEL-NEXT: global_store_b64 v0, v[0:1], s[0:1] -; GFX11DAGISEL-NEXT: s_endpgm -; -; GFX11GISEL-LABEL: poison_value_i64: -; GFX11GISEL: ; %bb.0: ; %entry -; GFX11GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11GISEL-NEXT: global_store_b64 v0, v[0:1], s[0:1] -; GFX11GISEL-NEXT: s_endpgm -entry: - %result = call i64 @llvm.amdgcn.wave.reduce.min.i64(i64 poison, i32 1) - store i64 %result, ptr addrspace(1) %out - ret void -} - define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-LABEL: divergent_value_i64: ; GFX8DAGISEL: ; %bb.0: ; %entry @@ -1272,7 +926,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-NEXT: s_mov_b32 s4, -1 ; GFX8DAGISEL-NEXT: s_brev_b32 s5, -2 ; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX8DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1283,7 +937,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX8DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX8DAGISEL-NEXT: ; %bb.2: ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1297,7 +951,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8GISEL-NEXT: s_mov_b32 s4, -1 ; GFX8GISEL-NEXT: s_brev_b32 s5, -2 ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX8GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX8GISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX8GISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1308,7 +962,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8GISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX8GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX8GISEL-NEXT: ; %bb.2: ; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1322,7 +976,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9DAGISEL-NEXT: s_mov_b32 s4, -1 ; GFX9DAGISEL-NEXT: s_brev_b32 s5, -2 ; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX9DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1333,7 +987,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX9DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX9DAGISEL-NEXT: ; %bb.2: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1347,7 +1001,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9GISEL-NEXT: s_mov_b32 s4, -1 ; GFX9GISEL-NEXT: s_brev_b32 s5, -2 ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX9GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX9GISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1358,7 +1012,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9GISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX9GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX9GISEL-NEXT: ; %bb.2: ; GFX9GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1372,7 +1026,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064DAGISEL-NEXT: s_mov_b32 s4, -1 ; GFX1064DAGISEL-NEXT: s_brev_b32 s5, -2 ; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX1064DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1383,7 +1037,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX1064DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1064DAGISEL-NEXT: ; %bb.2: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1396,7 +1050,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064GISEL-NEXT: s_mov_b32 s4, -1 ; GFX1064GISEL-NEXT: s_brev_b32 s5, -2 ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX1064GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX1064GISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1407,7 +1061,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064GISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX1064GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1064GISEL-NEXT: ; %bb.2: ; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1420,7 +1074,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032DAGISEL-NEXT: s_mov_b32 s4, -1 ; GFX1032DAGISEL-NEXT: s_brev_b32 s5, -2 ; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo -; GFX1032DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1431,7 +1085,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032DAGISEL-NEXT: s_bitset0_b32 s6, s7 ; GFX1032DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1032DAGISEL-NEXT: ; %bb.2: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1444,7 +1098,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032GISEL-NEXT: s_mov_b32 s4, -1 ; GFX1032GISEL-NEXT: s_brev_b32 s5, -2 ; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo -; GFX1032GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1455,7 +1109,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032GISEL-NEXT: s_bitset0_b32 s6, s7 ; GFX1032GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1032GISEL-NEXT: ; %bb.2: ; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1468,7 +1122,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164DAGISEL-NEXT: s_mov_b32 s0, -1 ; GFX1164DAGISEL-NEXT: s_brev_b32 s1, -2 ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s8, s[2:3] ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v4, s0 @@ -1480,7 +1134,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s8 ; GFX1164DAGISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1164DAGISEL-NEXT: ; %bb.2: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, s0 @@ -1493,7 +1147,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164GISEL-NEXT: s_mov_b32 s0, -1 ; GFX1164GISEL-NEXT: s_brev_b32 s1, -2 ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s8, s[2:3] ; GFX1164GISEL-NEXT: v_mov_b32_e32 v4, s0 @@ -1505,7 +1159,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s8 ; GFX1164GISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1164GISEL-NEXT: ; %bb.2: ; GFX1164GISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, s0 @@ -1518,7 +1172,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132DAGISEL-NEXT: s_mov_b32 s0, -1 ; GFX1132DAGISEL-NEXT: s_brev_b32 s1, -2 ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 @@ -1529,7 +1183,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132DAGISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1132DAGISEL-NEXT: ; %bb.2: ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX1132DAGISEL-NEXT: global_store_b64 v[0:1], v[2:3], off @@ -1541,7 +1195,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-NEXT: s_mov_b32 s0, -1 ; GFX1132GISEL-NEXT: s_brev_b32 s1, -2 ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 @@ -1552,7 +1206,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132GISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1132GISEL-NEXT: ; %bb.2: ; GFX1132GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX1132GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off @@ -1593,19 +1247,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX8GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX8GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX8GISEL-NEXT: ; %bb.1: ; %else ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX8GISEL-NEXT: .LBB9_2: ; %Flow +; GFX8GISEL-NEXT: .LBB5_2: ; %Flow ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX8GISEL-NEXT: ; %bb.3: ; %if ; GFX8GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX8GISEL-NEXT: .LBB9_4: ; %endif +; GFX8GISEL-NEXT: .LBB5_4: ; %endif ; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 @@ -1642,19 +1296,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX9GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX9GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX9GISEL-NEXT: ; %bb.1: ; %else ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9GISEL-NEXT: .LBB9_2: ; %Flow +; GFX9GISEL-NEXT: .LBB5_2: ; %Flow ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX9GISEL-NEXT: ; %bb.3: ; %if ; GFX9GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX9GISEL-NEXT: .LBB9_4: ; %endif +; GFX9GISEL-NEXT: .LBB5_4: ; %endif ; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1691,19 +1345,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX1064GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1064GISEL-NEXT: ; %bb.1: ; %else ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1064GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1064GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1064GISEL-NEXT: ; %bb.3: ; %if ; GFX1064GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX1064GISEL-NEXT: .LBB9_4: ; %endif +; GFX1064GISEL-NEXT: .LBB5_4: ; %endif ; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1740,19 +1394,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX1032GISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GFX1032GISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1032GISEL-NEXT: ; %bb.1: ; %else ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1032GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1032GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s2, s8 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1032GISEL-NEXT: ; %bb.3: ; %if ; GFX1032GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX1032GISEL-NEXT: .LBB9_4: ; %endif +; GFX1032GISEL-NEXT: .LBB5_4: ; %endif ; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1793,19 +1447,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1164GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1164GISEL-NEXT: ; %bb.1: ; %else ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1164GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1164GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[2:3], s[8:9] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1164GISEL-NEXT: ; %bb.3: ; %if ; GFX1164GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX1164GISEL-NEXT: .LBB9_4: ; %endif +; GFX1164GISEL-NEXT: .LBB5_4: ; %endif ; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1844,19 +1498,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1132GISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1132GISEL-NEXT: ; %bb.1: ; %else ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1132GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1132GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s2, s8 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1132GISEL-NEXT: ; %bb.3: ; %if ; GFX1132GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX1132GISEL-NEXT: .LBB9_4: ; %endif +; GFX1132GISEL-NEXT: .LBB5_4: ; %endif ; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 ; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0 @@ -1880,3 +1534,6 @@ endif: store i64 %combine, ptr addrspace(1) %out ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX11DAGISEL: {{.*}} +; GFX11GISEL: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll index 99b998b5ed177..d6ccf7ce2831d 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll @@ -124,187 +124,19 @@ entry: ret void } -define amdgpu_kernel void @const_value(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: const_value: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, 0x7b -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: const_value: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, 0x7b -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: const_value: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: const_value: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: const_value: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: const_value: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: const_value: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: const_value: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: const_value: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7b -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: const_value: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %result = call i32 @llvm.amdgcn.wave.reduce.or.i32(i32 123, i32 1) - store i32 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) { -; GFX8DAGISEL-LABEL: poison_value: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v0 -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: poison_value: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dword v[0:1], v0 -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: poison_value: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v0, v0, s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: poison_value: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dword v0, v0, s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: poison_value: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dword v0, v0, s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: poison_value: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dword v0, v0, s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX11DAGISEL-LABEL: poison_value: -; GFX11DAGISEL: ; %bb.0: ; %entry -; GFX11DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11DAGISEL-NEXT: global_store_b32 v0, v0, s[0:1] -; GFX11DAGISEL-NEXT: s_endpgm -; -; GFX11GISEL-LABEL: poison_value: -; GFX11GISEL: ; %bb.0: ; %entry -; GFX11GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11GISEL-NEXT: global_store_b32 v0, v0, s[0:1] -; GFX11GISEL-NEXT: s_endpgm -entry: - %result = call i32 @llvm.amdgcn.wave.reduce.or.i32(i32 poison, i32 1) - store i32 %result, ptr addrspace(1) %out - ret void -} - define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX8DAGISEL-LABEL: divergent_value: ; GFX8DAGISEL: ; %bb.0: ; %entry ; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX8DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX8DAGISEL-NEXT: s_or_b32 s4, s4, s6 ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX8DAGISEL-NEXT: ; %bb.2: ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 @@ -318,13 +150,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8GISEL-NEXT: s_mov_b32 s4, 0 -; GFX8GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX8GISEL-NEXT: s_or_b32 s4, s4, s6 ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX8GISEL-NEXT: ; %bb.2: ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 @@ -339,13 +171,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX9DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX9DAGISEL-NEXT: s_or_b32 s4, s4, s6 ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX9DAGISEL-NEXT: ; %bb.2: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -357,13 +189,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX9GISEL-NEXT: s_or_b32 s4, s4, s6 ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX9GISEL-NEXT: ; %bb.2: ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -377,13 +209,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX1064DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1064DAGISEL-NEXT: s_or_b32 s4, s4, s6 ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1064DAGISEL-NEXT: ; %bb.2: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -395,13 +227,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064GISEL-NEXT: s_mov_b32 s4, 0 -; GFX1064GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1064GISEL-NEXT: s_or_b32 s4, s4, s6 ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1064GISEL-NEXT: ; %bb.2: ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -415,13 +247,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1032DAGISEL-NEXT: s_mov_b32 s2, 0 -; GFX1032DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1032DAGISEL-NEXT: s_or_b32 s2, s2, s5 ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1032DAGISEL-NEXT: ; %bb.2: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -433,13 +265,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1032GISEL-NEXT: s_mov_b32 s2, 0 -; GFX1032GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3 ; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1032GISEL-NEXT: s_or_b32 s2, s2, s5 ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1032GISEL-NEXT: ; %bb.2: ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -454,14 +286,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX1164DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1164DAGISEL-NEXT: s_or_b32 s4, s4, s6 ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1164DAGISEL-NEXT: ; %bb.2: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -474,14 +306,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164GISEL-NEXT: s_mov_b32 s4, 0 -; GFX1164GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1164GISEL-NEXT: s_or_b32 s4, s4, s6 ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1164GISEL-NEXT: ; %bb.2: ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -495,14 +327,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0 -; GFX1132DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1132DAGISEL-NEXT: s_or_b32 s2, s2, s5 ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1132DAGISEL-NEXT: ; %bb.2: ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -515,14 +347,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1132GISEL-NEXT: s_mov_b32 s2, 0 -; GFX1132GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3 ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1132GISEL-NEXT: s_or_b32 s2, s2, s5 ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1132GISEL-NEXT: ; %bb.2: ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -550,20 +382,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX8DAGISEL-NEXT: ; %bb.3: ; %if ; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX8DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX8DAGISEL-NEXT: s_or_b32 s6, s6, s8 ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX8DAGISEL-NEXT: ; %bb.5: ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX8DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX8DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -578,26 +410,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX8GISEL-NEXT: ; implicit-def: $sgpr6 ; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX8GISEL-NEXT: ; %bb.1: ; %else ; GFX8GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX8GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b32 s6, s2 -; GFX8GISEL-NEXT: .LBB4_2: ; %Flow +; GFX8GISEL-NEXT: .LBB2_2: ; %Flow ; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX8GISEL-NEXT: ; %bb.3: ; %if ; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8GISEL-NEXT: s_mov_b32 s6, 0 -; GFX8GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX8GISEL-NEXT: s_or_b32 s6, s6, s8 ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX8GISEL-NEXT: .LBB4_5: ; %endif +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX8GISEL-NEXT: .LBB2_5: ; %endif ; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6 @@ -621,20 +453,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX9DAGISEL-NEXT: ; %bb.3: ; %if ; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX9DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX9DAGISEL-NEXT: s_or_b32 s6, s6, s8 ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX9DAGISEL-NEXT: ; %bb.5: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX9DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX9DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -648,26 +480,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX9GISEL-NEXT: ; implicit-def: $sgpr6 ; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX9GISEL-NEXT: ; %bb.1: ; %else ; GFX9GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX9GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b32 s6, s2 -; GFX9GISEL-NEXT: .LBB4_2: ; %Flow +; GFX9GISEL-NEXT: .LBB2_2: ; %Flow ; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX9GISEL-NEXT: ; %bb.3: ; %if ; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9GISEL-NEXT: s_mov_b32 s6, 0 -; GFX9GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX9GISEL-NEXT: s_or_b32 s6, s6, s8 ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX9GISEL-NEXT: .LBB4_5: ; %endif +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX9GISEL-NEXT: .LBB2_5: ; %endif ; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -690,20 +522,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX1064DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1064DAGISEL-NEXT: s_or_b32 s6, s6, s8 ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1064DAGISEL-NEXT: ; %bb.5: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX1064DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1064DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -717,26 +549,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6 ; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1064GISEL-NEXT: ; %bb.1: ; %else ; GFX1064GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b32 s6, s2 -; GFX1064GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1064GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1064GISEL-NEXT: ; %bb.3: ; %if ; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064GISEL-NEXT: s_mov_b32 s6, 0 -; GFX1064GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1064GISEL-NEXT: s_or_b32 s6, s6, s8 ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1064GISEL-NEXT: .LBB4_5: ; %endif +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1064GISEL-NEXT: .LBB2_5: ; %endif ; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -759,20 +591,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1032DAGISEL-NEXT: s_mov_b32 s1, 0 -; GFX1032DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1032DAGISEL-NEXT: s_or_b32 s1, s1, s6 ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1032DAGISEL-NEXT: ; %bb.5: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1032DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1032DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -786,26 +618,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0 ; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1032GISEL-NEXT: ; %bb.1: ; %else ; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c ; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b32 s0, s0 -; GFX1032GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1032GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1032GISEL-NEXT: ; %bb.3: ; %if ; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1032GISEL-NEXT: s_mov_b32 s0, 0 -; GFX1032GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2 ; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1032GISEL-NEXT: s_or_b32 s0, s0, s6 ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1032GISEL-NEXT: .LBB4_5: ; %endif +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1032GISEL-NEXT: .LBB2_5: ; %endif ; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0 @@ -830,21 +662,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX1164DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1164DAGISEL-NEXT: s_or_b32 s6, s6, s8 ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1164DAGISEL-NEXT: ; %bb.5: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX1164DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1164DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -860,27 +692,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1164GISEL-NEXT: ; %bb.1: ; %else ; GFX1164GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c ; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b32 s6, s2 -; GFX1164GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1164GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1164GISEL-NEXT: ; %bb.3: ; %if ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164GISEL-NEXT: s_mov_b32 s6, 0 -; GFX1164GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1164GISEL-NEXT: s_or_b32 s6, s6, s8 ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1164GISEL-NEXT: .LBB4_5: ; %endif +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1164GISEL-NEXT: .LBB2_5: ; %endif ; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -905,21 +737,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132DAGISEL-NEXT: s_mov_b32 s1, 0 -; GFX1132DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132DAGISEL-NEXT: s_or_b32 s1, s1, s6 ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1132DAGISEL-NEXT: ; %bb.5: ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1132DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1132DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -935,27 +767,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1132GISEL-NEXT: ; %bb.1: ; %else ; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c ; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b32 s0, s0 -; GFX1132GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1132GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1132GISEL-NEXT: ; %bb.3: ; %if ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132GISEL-NEXT: s_mov_b32 s0, 0 -; GFX1132GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132GISEL-NEXT: s_or_b32 s0, s0, s6 ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1132GISEL-NEXT: .LBB4_5: ; %endif +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1132GISEL-NEXT: .LBB2_5: ; %endif ; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0 @@ -1087,198 +919,20 @@ entry: ret void } -define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: const_value_i64: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: const_value_i64: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: const_value_i64: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: const_value_i64: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: const_value_i64: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: const_value_i64: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX10GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: const_value_i64: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: const_value_i64: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: const_value_i64: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: const_value_i64: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1132GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %result = call i64 @llvm.amdgcn.wave.reduce.or.i64(i64 123, i32 1) - store i64 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) { -; GFX8DAGISEL-LABEL: poison_value_i64: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: poison_value_i64: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: poison_value_i64: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: poison_value_i64: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: poison_value_i64: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: poison_value_i64: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX11DAGISEL-LABEL: poison_value_i64: -; GFX11DAGISEL: ; %bb.0: ; %entry -; GFX11DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11DAGISEL-NEXT: global_store_b64 v0, v[0:1], s[0:1] -; GFX11DAGISEL-NEXT: s_endpgm -; -; GFX11GISEL-LABEL: poison_value_i64: -; GFX11GISEL: ; %bb.0: ; %entry -; GFX11GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX11GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX11GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11GISEL-NEXT: global_store_b64 v0, v[0:1], s[0:1] -; GFX11GISEL-NEXT: s_endpgm -entry: - %result = call i64 @llvm.amdgcn.wave.reduce.or.i64(i64 poison, i32 1) - store i64 %result, ptr addrspace(1) %out - ret void -} - define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-LABEL: divergent_value_i64: ; GFX8DAGISEL: ; %bb.0: ; %entry ; GFX8DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX8DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s10, s[6:7] ; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v2, s10 ; GFX8DAGISEL-NEXT: v_readlane_b32 s9, v3, s10 ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[6:7], s10 ; GFX8DAGISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX8DAGISEL-NEXT: ; %bb.2: ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1291,14 +945,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX8GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s10, s[6:7] ; GFX8GISEL-NEXT: v_readlane_b32 s8, v2, s10 ; GFX8GISEL-NEXT: v_readlane_b32 s9, v3, s10 ; GFX8GISEL-NEXT: s_bitset0_b64 s[6:7], s10 ; GFX8GISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX8GISEL-NEXT: ; %bb.2: ; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1311,14 +965,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX9DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s10, s[6:7] ; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v2, s10 ; GFX9DAGISEL-NEXT: v_readlane_b32 s9, v3, s10 ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[6:7], s10 ; GFX9DAGISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX9DAGISEL-NEXT: ; %bb.2: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1331,14 +985,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX9GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s10, s[6:7] ; GFX9GISEL-NEXT: v_readlane_b32 s8, v2, s10 ; GFX9GISEL-NEXT: v_readlane_b32 s9, v3, s10 ; GFX9GISEL-NEXT: s_bitset0_b64 s[6:7], s10 ; GFX9GISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX9GISEL-NEXT: ; %bb.2: ; GFX9GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1351,14 +1005,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX1064DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s10, s[6:7] ; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v2, s10 ; GFX1064DAGISEL-NEXT: v_readlane_b32 s9, v3, s10 ; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[6:7], s10 ; GFX1064DAGISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1064DAGISEL-NEXT: ; %bb.2: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1370,14 +1024,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX1064GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s10, s[6:7] ; GFX1064GISEL-NEXT: v_readlane_b32 s8, v2, s10 ; GFX1064GISEL-NEXT: v_readlane_b32 s9, v3, s10 ; GFX1064GISEL-NEXT: s_bitset0_b64 s[6:7], s10 ; GFX1064GISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1064GISEL-NEXT: ; %bb.2: ; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1389,14 +1043,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1032DAGISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo -; GFX1032DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s8, v2, s7 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s9, v3, s7 ; GFX1032DAGISEL-NEXT: s_bitset0_b32 s6, s7 ; GFX1032DAGISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1032DAGISEL-NEXT: ; %bb.2: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1408,14 +1062,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo -; GFX1032GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032GISEL-NEXT: v_readlane_b32 s8, v2, s7 ; GFX1032GISEL-NEXT: v_readlane_b32 s9, v3, s7 ; GFX1032GISEL-NEXT: s_bitset0_b32 s6, s7 ; GFX1032GISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1032GISEL-NEXT: ; %bb.2: ; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1427,7 +1081,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s6, s[2:3] ; GFX1164DAGISEL-NEXT: v_readlane_b32 s4, v2, s6 @@ -1435,7 +1089,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s6 ; GFX1164DAGISEL-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1164DAGISEL-NEXT: ; %bb.2: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, s0 @@ -1447,7 +1101,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s6, s[2:3] ; GFX1164GISEL-NEXT: v_readlane_b32 s4, v2, s6 @@ -1455,7 +1109,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s6 ; GFX1164GISEL-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1164GISEL-NEXT: ; %bb.2: ; GFX1164GISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, s0 @@ -1467,7 +1121,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1132DAGISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132DAGISEL-NEXT: v_readlane_b32 s4, v2, s3 @@ -1475,7 +1129,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132DAGISEL-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1132DAGISEL-NEXT: ; %bb.2: ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX1132DAGISEL-NEXT: global_store_b64 v[0:1], v[2:3], off @@ -1486,7 +1140,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132GISEL-NEXT: v_readlane_b32 s4, v2, s3 @@ -1494,7 +1148,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132GISEL-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1132GISEL-NEXT: ; %bb.2: ; GFX1132GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX1132GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off @@ -1536,19 +1190,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX8GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX8GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX8GISEL-NEXT: ; %bb.1: ; %else ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX8GISEL-NEXT: .LBB9_2: ; %Flow +; GFX8GISEL-NEXT: .LBB5_2: ; %Flow ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX8GISEL-NEXT: ; %bb.3: ; %if ; GFX8GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX8GISEL-NEXT: .LBB9_4: ; %endif +; GFX8GISEL-NEXT: .LBB5_4: ; %endif ; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 @@ -1585,19 +1239,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX9GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX9GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX9GISEL-NEXT: ; %bb.1: ; %else ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9GISEL-NEXT: .LBB9_2: ; %Flow +; GFX9GISEL-NEXT: .LBB5_2: ; %Flow ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX9GISEL-NEXT: ; %bb.3: ; %if ; GFX9GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX9GISEL-NEXT: .LBB9_4: ; %endif +; GFX9GISEL-NEXT: .LBB5_4: ; %endif ; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1634,19 +1288,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX1064GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1064GISEL-NEXT: ; %bb.1: ; %else ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1064GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1064GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1064GISEL-NEXT: ; %bb.3: ; %if ; GFX1064GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX1064GISEL-NEXT: .LBB9_4: ; %endif +; GFX1064GISEL-NEXT: .LBB5_4: ; %endif ; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1683,19 +1337,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX1032GISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GFX1032GISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1032GISEL-NEXT: ; %bb.1: ; %else ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1032GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1032GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s2, s8 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1032GISEL-NEXT: ; %bb.3: ; %if ; GFX1032GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX1032GISEL-NEXT: .LBB9_4: ; %endif +; GFX1032GISEL-NEXT: .LBB5_4: ; %endif ; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1736,19 +1390,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1164GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1164GISEL-NEXT: ; %bb.1: ; %else ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1164GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1164GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[2:3], s[8:9] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1164GISEL-NEXT: ; %bb.3: ; %if ; GFX1164GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX1164GISEL-NEXT: .LBB9_4: ; %endif +; GFX1164GISEL-NEXT: .LBB5_4: ; %endif ; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1787,19 +1441,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1132GISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1132GISEL-NEXT: ; %bb.1: ; %else ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1132GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1132GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s2, s8 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1132GISEL-NEXT: ; %bb.3: ; %if ; GFX1132GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX1132GISEL-NEXT: .LBB9_4: ; %endif +; GFX1132GISEL-NEXT: .LBB5_4: ; %endif ; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 ; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0 @@ -1823,3 +1477,6 @@ endif: store i64 %combine, ptr addrspace(1) %out ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX11DAGISEL: {{.*}} +; GFX11GISEL: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll index bc8bf7f4b3b6f..fab269ea8cfb9 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll @@ -215,1989 +215,1081 @@ entry: ret void } -define amdgpu_kernel void @const_value(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: const_value: +define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { +; GFX8DAGISEL-LABEL: divergent_value: ; GFX8DAGISEL: ; %bb.0: ; %entry ; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_sub_i32 s4, 0, 0x7b -; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8DAGISEL-NEXT: s_mul_i32 s2, s4, s2 +; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0 +; GFX8DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] +; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX8DAGISEL-NEXT: s_sub_i32 s4, s4, s6 +; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX8DAGISEL-NEXT: ; %bb.2: ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2 ; GFX8DAGISEL-NEXT: s_endpgm ; -; GFX8GISEL-LABEL: const_value: +; GFX8GISEL-LABEL: divergent_value: ; GFX8GISEL: ; %bb.0: ; %entry ; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8GISEL-NEXT: s_sub_i32 s3, 0, 0x7b -; GFX8GISEL-NEXT: s_mul_i32 s2, s3, s2 +; GFX8GISEL-NEXT: s_mov_b32 s4, 0 +; GFX8GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] +; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX8GISEL-NEXT: s_sub_i32 s4, s4, s6 +; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX8GISEL-NEXT: ; %bb.2: ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 ; GFX8GISEL-NEXT: s_endpgm ; -; GFX9DAGISEL-LABEL: const_value: +; GFX9DAGISEL-LABEL: divergent_value: ; GFX9DAGISEL: ; %bb.0: ; %entry ; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9DAGISEL-NEXT: s_sub_i32 s3, 0, 0x7b -; GFX9DAGISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0 +; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] +; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX9DAGISEL-NEXT: s_sub_i32 s4, s4, s6 +; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX9DAGISEL-NEXT: ; %bb.2: +; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9DAGISEL-NEXT: s_endpgm ; -; GFX9GISEL-LABEL: const_value: +; GFX9GISEL-LABEL: divergent_value: ; GFX9GISEL: ; %bb.0: ; %entry ; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9GISEL-NEXT: s_sub_i32 s3, 0, 0x7b -; GFX9GISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX9GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] +; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX9GISEL-NEXT: s_sub_i32 s4, s4, s6 +; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX9GISEL-NEXT: ; %bb.2: +; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9GISEL-NEXT: s_endpgm ; -; GFX1064DAGISEL-LABEL: const_value: +; GFX1064DAGISEL-LABEL: divergent_value: ; GFX1064DAGISEL: ; %bb.0: ; %entry ; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064DAGISEL-NEXT: s_sub_i32 s3, 0, 0x7b -; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0 +; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] +; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX1064DAGISEL-NEXT: s_sub_i32 s4, s4, s6 +; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1064DAGISEL-NEXT: ; %bb.2: +; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] +; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX1064DAGISEL-NEXT: s_endpgm ; -; GFX1064GISEL-LABEL: const_value: +; GFX1064GISEL-LABEL: divergent_value: ; GFX1064GISEL: ; %bb.0: ; %entry ; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1064GISEL-NEXT: s_mov_b32 s4, 0 +; GFX1064GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] +; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX1064GISEL-NEXT: s_sub_i32 s4, s4, s6 +; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1064GISEL-NEXT: ; %bb.2: +; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064GISEL-NEXT: s_sub_i32 s3, 0, 0x7b -; GFX1064GISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX1064GISEL-NEXT: s_endpgm ; -; GFX1032DAGISEL-LABEL: const_value: +; GFX1032DAGISEL-LABEL: divergent_value: ; GFX1032DAGISEL: ; %bb.0: ; %entry ; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032DAGISEL-NEXT: s_sub_i32 s3, 0, 0x7b -; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo +; GFX1032DAGISEL-NEXT: s_mov_b32 s2, 0 +; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3 +; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 +; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4 +; GFX1032DAGISEL-NEXT: s_sub_i32 s2, s2, s5 +; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1032DAGISEL-NEXT: ; %bb.2: +; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] +; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX1032DAGISEL-NEXT: s_endpgm ; -; GFX1032GISEL-LABEL: const_value: +; GFX1032GISEL-LABEL: divergent_value: ; GFX1032GISEL: ; %bb.0: ; %entry ; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032GISEL-NEXT: s_sub_i32 s3, 0, 0x7b -; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032GISEL-NEXT: s_mul_i32 s2, s3, s2 +; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo +; GFX1032GISEL-NEXT: s_mov_b32 s2, 0 +; GFX1032GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3 +; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4 +; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4 +; GFX1032GISEL-NEXT: s_sub_i32 s2, s2, s5 +; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1032GISEL-NEXT: ; %bb.2: ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX1032GISEL-NEXT: s_endpgm ; -; GFX1164DAGISEL-LABEL: const_value: +; GFX1164DAGISEL-LABEL: divergent_value: ; GFX1164DAGISEL: ; %bb.0: ; %entry ; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164DAGISEL-NEXT: s_sub_i32 s3, 0, 0x7b -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX1164DAGISEL-NEXT: s_mov_b32 s4, 0 +; GFX1164DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] +; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX1164DAGISEL-NEXT: s_sub_i32 s4, s4, s6 +; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1164DAGISEL-NEXT: ; %bb.2: +; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX1164DAGISEL-NEXT: s_endpgm ; -; GFX1164GISEL-LABEL: const_value: +; GFX1164GISEL-LABEL: divergent_value: ; GFX1164GISEL: ; %bb.0: ; %entry ; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1164GISEL-NEXT: s_mov_b32 s4, 0 +; GFX1164GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] +; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX1164GISEL-NEXT: s_sub_i32 s4, s4, s6 +; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1164GISEL-NEXT: ; %bb.2: +; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164GISEL-NEXT: s_sub_i32 s3, 0, 0x7b -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX1164GISEL-NEXT: s_endpgm ; -; GFX1132DAGISEL-LABEL: const_value: +; GFX1132DAGISEL-LABEL: divergent_value: ; GFX1132DAGISEL: ; %bb.0: ; %entry ; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: s_sub_i32 s3, 0, 0x7b -; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 +; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo +; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0 +; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3 +; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 +; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4 +; GFX1132DAGISEL-NEXT: s_sub_i32 s2, s2, s5 +; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1132DAGISEL-NEXT: ; %bb.2: +; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX1132DAGISEL-NEXT: s_endpgm ; -; GFX1132GISEL-LABEL: const_value: +; GFX1132GISEL-LABEL: divergent_value: ; GFX1132GISEL: ; %bb.0: ; %entry ; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: s_sub_i32 s3, 0, 0x7b -; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1132GISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo +; GFX1132GISEL-NEXT: s_mov_b32 s2, 0 +; GFX1132GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3 +; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4 +; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4 +; GFX1132GISEL-NEXT: s_sub_i32 s2, s2, s5 +; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1132GISEL-NEXT: ; %bb.2: +; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX1132GISEL-NEXT: s_endpgm ; -; GFX12DAGISEL-LABEL: const_value: +; GFX12DAGISEL-LABEL: divergent_value: ; GFX12DAGISEL: ; %bb.0: ; %entry ; GFX12DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX12DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX12DAGISEL-NEXT: s_sub_co_i32 s3, 0, 0x7b -; GFX12DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX12DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX12DAGISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX12DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; GFX12DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 +; GFX12DAGISEL-NEXT: s_mov_b32 s3, exec_lo +; GFX12DAGISEL-NEXT: s_mov_b32 s2, 0 +; GFX12DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX12DAGISEL-NEXT: s_ctz_i32_b32 s4, s3 +; GFX12DAGISEL-NEXT: s_wait_alu 0xfffe +; GFX12DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 +; GFX12DAGISEL-NEXT: s_bitset0_b32 s3, s4 +; GFX12DAGISEL-NEXT: s_sub_co_i32 s2, s2, s5 +; GFX12DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 +; GFX12DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX12DAGISEL-NEXT: ; %bb.2: +; GFX12DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX12DAGISEL-NEXT: s_wait_kmcnt 0x0 -; GFX12DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX12DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX12DAGISEL-NEXT: s_endpgm entry: - %result = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 123, i32 1) + %id.x = call i32 @llvm.amdgcn.workitem.id.x() + %result = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 %id.x, i32 1) store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) { -; GFX8DAGISEL-LABEL: poison_value: +define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { +; GFX8DAGISEL-LABEL: divergent_cfg: ; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 +; GFX8DAGISEL-NEXT: ; implicit-def: $sgpr2 +; GFX8DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX8DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX8DAGISEL-NEXT: ; %bb.1: ; %else +; GFX8DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c ; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX8DAGISEL-NEXT: ; implicit-def: $vgpr0 ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: s_sub_i32 s4, 0, s0 -; GFX8DAGISEL-NEXT: s_mul_i32 s2, s4, s2 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2 -; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2 +; GFX8DAGISEL-NEXT: s_sub_i32 s3, 0, s6 +; GFX8DAGISEL-NEXT: s_mul_i32 s2, s3, s2 +; GFX8DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX8DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX8DAGISEL-NEXT: ; %bb.3: ; %if +; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX8DAGISEL-NEXT: s_mov_b32 s6, 0 +; GFX8DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] +; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX8DAGISEL-NEXT: s_sub_i32 s6, s6, s8 +; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX8DAGISEL-NEXT: ; %bb.5: +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6 +; GFX8DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX8DAGISEL-NEXT: flat_store_dword v[2:3], v1 ; GFX8DAGISEL-NEXT: s_endpgm ; -; GFX8GISEL-LABEL: poison_value: +; GFX8GISEL-LABEL: divergent_cfg: ; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 +; GFX8GISEL-NEXT: ; implicit-def: $sgpr6 +; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX8GISEL-NEXT: ; %bb.1: ; %else +; GFX8GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c ; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX8GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: s_sub_i32 s3, 0, s0 -; GFX8GISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s2 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: poison_value: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: s_sub_i32 s3, 0, s0 +; GFX8GISEL-NEXT: s_sub_i32 s3, 0, s6 +; GFX8GISEL-NEXT: s_mul_i32 s6, s3, s2 +; GFX8GISEL-NEXT: .LBB2_2: ; %Flow +; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_5 +; GFX8GISEL-NEXT: ; %bb.3: ; %if +; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX8GISEL-NEXT: s_mov_b32 s6, 0 +; GFX8GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] +; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX8GISEL-NEXT: s_sub_i32 s6, s6, s8 +; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX8GISEL-NEXT: .LBB2_5: ; %endif +; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6 +; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 +; GFX8GISEL-NEXT: s_endpgm +; +; GFX9DAGISEL-LABEL: divergent_cfg: +; GFX9DAGISEL: ; %bb.0: ; %entry +; GFX9DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 +; GFX9DAGISEL-NEXT: ; implicit-def: $sgpr2 +; GFX9DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX9DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX9DAGISEL-NEXT: ; %bb.1: ; %else +; GFX9DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c +; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX9DAGISEL-NEXT: ; implicit-def: $vgpr0 +; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9DAGISEL-NEXT: s_sub_i32 s3, 0, s6 ; GFX9DAGISEL-NEXT: s_mul_i32 s2, s3, s2 +; GFX9DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX9DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX9DAGISEL-NEXT: ; %bb.3: ; %if +; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX9DAGISEL-NEXT: s_mov_b32 s6, 0 +; GFX9DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] +; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX9DAGISEL-NEXT: s_sub_i32 s6, s6, s8 +; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX9DAGISEL-NEXT: ; %bb.5: +; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6 +; GFX9DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] ; GFX9DAGISEL-NEXT: s_endpgm ; -; GFX9GISEL-LABEL: poison_value: +; GFX9GISEL-LABEL: divergent_cfg: ; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 +; GFX9GISEL-NEXT: ; implicit-def: $sgpr6 +; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX9GISEL-NEXT: ; %bb.1: ; %else +; GFX9GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c ; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX9GISEL-NEXT: ; implicit-def: $vgpr0 +; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9GISEL-NEXT: s_sub_i32 s3, 0, s6 +; GFX9GISEL-NEXT: s_mul_i32 s6, s3, s2 +; GFX9GISEL-NEXT: .LBB2_2: ; %Flow +; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_5 +; GFX9GISEL-NEXT: ; %bb.3: ; %if +; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX9GISEL-NEXT: s_mov_b32 s6, 0 +; GFX9GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] +; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX9GISEL-NEXT: s_sub_i32 s6, s6, s8 +; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX9GISEL-NEXT: .LBB2_5: ; %endif +; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: s_sub_i32 s3, 0, s0 -; GFX9GISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9GISEL-NEXT: s_endpgm ; -; GFX1064DAGISEL-LABEL: poison_value: +; GFX1064DAGISEL-LABEL: divergent_cfg: ; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1064DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 +; GFX1064DAGISEL-NEXT: ; implicit-def: $sgpr2 +; GFX1064DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX1064DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1064DAGISEL-NEXT: ; %bb.1: ; %else +; GFX1064DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c ; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX1064DAGISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: s_sub_i32 s3, 0, s0 +; GFX1064DAGISEL-NEXT: s_sub_i32 s3, 0, s6 ; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s3, s2 +; GFX1064DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX1064DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if +; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1064DAGISEL-NEXT: s_mov_b32 s6, 0 +; GFX1064DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] +; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX1064DAGISEL-NEXT: s_sub_i32 s6, s6, s8 +; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1064DAGISEL-NEXT: ; %bb.5: +; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6 +; GFX1064DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] ; GFX1064DAGISEL-NEXT: s_endpgm ; -; GFX1064GISEL-LABEL: poison_value: +; GFX1064GISEL-LABEL: divergent_cfg: ; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1064GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 +; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6 +; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1064GISEL-NEXT: ; %bb.1: ; %else +; GFX1064GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c ; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: s_sub_i32 s3, 0, s0 -; GFX1064GISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX1064GISEL-NEXT: s_sub_i32 s3, 0, s6 +; GFX1064GISEL-NEXT: s_mul_i32 s6, s3, s2 +; GFX1064GISEL-NEXT: .LBB2_2: ; %Flow +; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_5 +; GFX1064GISEL-NEXT: ; %bb.3: ; %if +; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1064GISEL-NEXT: s_mov_b32 s6, 0 +; GFX1064GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] +; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX1064GISEL-NEXT: s_sub_i32 s6, s6, s8 +; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1064GISEL-NEXT: .LBB2_5: ; %endif +; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 +; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX1064GISEL-NEXT: s_endpgm ; -; GFX1032DAGISEL-LABEL: poison_value: +; GFX1032DAGISEL-LABEL: divergent_cfg: ; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1032DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc_lo, 15, v0 +; GFX1032DAGISEL-NEXT: ; implicit-def: $sgpr1 +; GFX1032DAGISEL-NEXT: s_and_saveexec_b32 s0, vcc_lo +; GFX1032DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0 +; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1032DAGISEL-NEXT: ; %bb.1: ; %else +; GFX1032DAGISEL-NEXT: s_load_dword s1, s[4:5], 0x2c ; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX1032DAGISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: s_sub_i32 s3, 0, s0 -; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX1032DAGISEL-NEXT: s_sub_i32 s1, 0, s1 +; GFX1032DAGISEL-NEXT: s_mul_i32 s1, s1, s2 +; GFX1032DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX1032DAGISEL-NEXT: s_or_saveexec_b32 s0, s0 +; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if +; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1032DAGISEL-NEXT: s_mov_b32 s1, 0 +; GFX1032DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2 +; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 +; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3 +; GFX1032DAGISEL-NEXT: s_sub_i32 s1, s1, s6 +; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1032DAGISEL-NEXT: ; %bb.5: +; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX1032DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] ; GFX1032DAGISEL-NEXT: s_endpgm ; -; GFX1032GISEL-LABEL: poison_value: +; GFX1032GISEL-LABEL: divergent_cfg: ; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1032GISEL-NEXT: v_cmp_le_u32_e32 vcc_lo, 16, v0 +; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0 +; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo +; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1032GISEL-NEXT: ; %bb.1: ; %else +; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c ; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: s_sub_i32 s3, 0, s0 -; GFX1032GISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[0:1] +; GFX1032GISEL-NEXT: s_sub_i32 s0, 0, s0 +; GFX1032GISEL-NEXT: s_mul_i32 s0, s0, s2 +; GFX1032GISEL-NEXT: .LBB2_2: ; %Flow +; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_5 +; GFX1032GISEL-NEXT: ; %bb.3: ; %if +; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1032GISEL-NEXT: s_mov_b32 s0, 0 +; GFX1032GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2 +; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3 +; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3 +; GFX1032GISEL-NEXT: s_sub_i32 s0, s0, s6 +; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1032GISEL-NEXT: .LBB2_5: ; %endif +; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 +; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[2:3] ; GFX1032GISEL-NEXT: s_endpgm ; -; GFX1164DAGISEL-LABEL: poison_value: +; GFX1164DAGISEL-LABEL: divergent_cfg: ; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], exec +; GFX1164DAGISEL-NEXT: ; implicit-def: $sgpr2 +; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1164DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 +; GFX1164DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1164DAGISEL-NEXT: ; %bb.1: ; %else +; GFX1164DAGISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX1164DAGISEL-NEXT: ; implicit-def: $vgpr0 +; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: s_sub_i32 s3, 0, s0 -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1164DAGISEL-NEXT: s_sub_i32 s3, 0, s6 ; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s3, s2 +; GFX1164DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX1164DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if +; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1164DAGISEL-NEXT: s_mov_b32 s6, 0 +; GFX1164DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] +; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX1164DAGISEL-NEXT: s_sub_i32 s6, s6, s8 +; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1164DAGISEL-NEXT: ; %bb.5: +; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6 +; GFX1164DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX1164DAGISEL-NEXT: s_endpgm ; -; GFX1164GISEL-LABEL: poison_value: +; GFX1164GISEL-LABEL: divergent_cfg: ; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], exec +; GFX1164GISEL-NEXT: ; implicit-def: $sgpr6 +; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 +; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1164GISEL-NEXT: ; %bb.1: ; %else +; GFX1164GISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0 +; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: s_sub_i32 s3, 0, s0 -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX1164GISEL-NEXT: s_sub_i32 s3, 0, s6 +; GFX1164GISEL-NEXT: s_mul_i32 s6, s3, s2 +; GFX1164GISEL-NEXT: .LBB2_2: ; %Flow +; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1] +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_5 +; GFX1164GISEL-NEXT: ; %bb.3: ; %if +; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1164GISEL-NEXT: s_mov_b32 s6, 0 +; GFX1164GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] +; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX1164GISEL-NEXT: s_sub_i32 s6, s6, s8 +; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1164GISEL-NEXT: .LBB2_5: ; %endif +; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 +; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX1164GISEL-NEXT: s_endpgm ; -; GFX1132DAGISEL-LABEL: poison_value: +; GFX1132DAGISEL-LABEL: divergent_cfg: ; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1132DAGISEL-NEXT: s_mov_b32 s0, exec_lo +; GFX1132DAGISEL-NEXT: ; implicit-def: $sgpr1 +; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1132DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 +; GFX1132DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0 +; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1132DAGISEL-NEXT: ; %bb.1: ; %else +; GFX1132DAGISEL-NEXT: s_load_b32 s1, s[4:5], 0x2c ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1132DAGISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: s_sub_i32 s3, 0, s0 -; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s3, s2 +; GFX1132DAGISEL-NEXT: s_sub_i32 s1, 0, s1 +; GFX1132DAGISEL-NEXT: s_mul_i32 s1, s1, s2 +; GFX1132DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX1132DAGISEL-NEXT: s_or_saveexec_b32 s0, s0 +; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if +; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1132DAGISEL-NEXT: s_mov_b32 s1, 0 +; GFX1132DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 +; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 +; GFX1132DAGISEL-NEXT: s_sub_i32 s1, s1, s6 +; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1132DAGISEL-NEXT: ; %bb.5: +; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX1132DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX1132DAGISEL-NEXT: s_endpgm ; -; GFX1132GISEL-LABEL: poison_value: +; GFX1132GISEL-LABEL: divergent_cfg: ; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1132GISEL-NEXT: s_mov_b32 s1, exec_lo +; GFX1132GISEL-NEXT: ; implicit-def: $sgpr0 +; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 +; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1132GISEL-NEXT: ; %bb.1: ; %else +; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0 +; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: s_sub_i32 s3, 0, s0 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1132GISEL-NEXT: s_sub_i32 s0, 0, s0 +; GFX1132GISEL-NEXT: s_mul_i32 s0, s0, s2 +; GFX1132GISEL-NEXT: .LBB2_2: ; %Flow +; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_5 +; GFX1132GISEL-NEXT: ; %bb.3: ; %if +; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1132GISEL-NEXT: s_mov_b32 s0, 0 +; GFX1132GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 +; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3 +; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 +; GFX1132GISEL-NEXT: s_sub_i32 s0, s0, s6 +; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1132GISEL-NEXT: .LBB2_5: ; %endif +; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 +; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0 +; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[2:3] ; GFX1132GISEL-NEXT: s_endpgm ; -; GFX12DAGISEL-LABEL: poison_value: +; GFX12DAGISEL-LABEL: divergent_cfg: ; GFX12DAGISEL: ; %bb.0: ; %entry -; GFX12DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX12DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX12DAGISEL-NEXT: s_mov_b32 s0, exec_lo +; GFX12DAGISEL-NEXT: ; implicit-def: $sgpr1 +; GFX12DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 +; GFX12DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0 +; GFX12DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX12DAGISEL-NEXT: ; %bb.1: ; %else +; GFX12DAGISEL-NEXT: s_load_b32 s1, s[4:5], 0x2c ; GFX12DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX12DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) -; GFX12DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX12DAGISEL-NEXT: s_wait_kmcnt 0x0 -; GFX12DAGISEL-NEXT: s_sub_co_i32 s3, 0, s0 -; GFX12DAGISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX12DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX12DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX12DAGISEL-NEXT: s_endpgm -entry: - %result = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 poison, i32 1) - store i32 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: divergent_value: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX8DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] -; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX8DAGISEL-NEXT: s_sub_i32 s4, s4, s6 -; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX8DAGISEL-NEXT: ; %bb.2: -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: divergent_value: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_mov_b32 s4, 0 -; GFX8GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] -; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX8GISEL-NEXT: s_sub_i32 s4, s4, s6 -; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX8GISEL-NEXT: ; %bb.2: -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: divergent_value: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX9DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] -; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX9DAGISEL-NEXT: s_sub_i32 s4, s4, s6 -; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX9DAGISEL-NEXT: ; %bb.2: -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: divergent_value: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] -; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX9GISEL-NEXT: s_sub_i32 s4, s4, s6 -; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX9GISEL-NEXT: ; %bb.2: -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX1064DAGISEL-LABEL: divergent_value: -; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX1064DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] -; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX1064DAGISEL-NEXT: s_sub_i32 s4, s4, s6 -; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1064DAGISEL-NEXT: ; %bb.2: -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1064DAGISEL-NEXT: s_endpgm -; -; GFX1064GISEL-LABEL: divergent_value: -; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064GISEL-NEXT: s_mov_b32 s4, 0 -; GFX1064GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] -; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX1064GISEL-NEXT: s_sub_i32 s4, s4, s6 -; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1064GISEL-NEXT: ; %bb.2: -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1064GISEL-NEXT: s_endpgm -; -; GFX1032DAGISEL-LABEL: divergent_value: -; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, 0 -; GFX1032DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3 -; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 -; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4 -; GFX1032DAGISEL-NEXT: s_sub_i32 s2, s2, s5 -; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1032DAGISEL-NEXT: ; %bb.2: -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1032DAGISEL-NEXT: s_endpgm -; -; GFX1032GISEL-LABEL: divergent_value: -; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo -; GFX1032GISEL-NEXT: s_mov_b32 s2, 0 -; GFX1032GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3 -; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4 -; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4 -; GFX1032GISEL-NEXT: s_sub_i32 s2, s2, s5 -; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1032GISEL-NEXT: ; %bb.2: -; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1032GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: divergent_value: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX1164DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX1164DAGISEL-NEXT: s_sub_i32 s4, s4, s6 -; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1164DAGISEL-NEXT: ; %bb.2: -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: divergent_value: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: s_mov_b32 s4, 0 -; GFX1164GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] -; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX1164GISEL-NEXT: s_sub_i32 s4, s4, s6 -; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1164GISEL-NEXT: ; %bb.2: -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: divergent_value: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 -; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0 -; GFX1132DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 -; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4 -; GFX1132DAGISEL-NEXT: s_sub_i32 s2, s2, s5 -; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1132DAGISEL-NEXT: ; %bb.2: -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: divergent_value: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo -; GFX1132GISEL-NEXT: s_mov_b32 s2, 0 -; GFX1132GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3 -; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4 -; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4 -; GFX1132GISEL-NEXT: s_sub_i32 s2, s2, s5 -; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1132GISEL-NEXT: ; %bb.2: -; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -; -; GFX12DAGISEL-LABEL: divergent_value: -; GFX12DAGISEL: ; %bb.0: ; %entry -; GFX12DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX12DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 -; GFX12DAGISEL-NEXT: s_mov_b32 s3, exec_lo -; GFX12DAGISEL-NEXT: s_mov_b32 s2, 0 -; GFX12DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX12DAGISEL-NEXT: s_ctz_i32_b32 s4, s3 -; GFX12DAGISEL-NEXT: s_wait_alu 0xfffe -; GFX12DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 -; GFX12DAGISEL-NEXT: s_bitset0_b32 s3, s4 -; GFX12DAGISEL-NEXT: s_sub_co_i32 s2, s2, s5 -; GFX12DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX12DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX12DAGISEL-NEXT: ; %bb.2: -; GFX12DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX12DAGISEL-NEXT: s_wait_kmcnt 0x0 -; GFX12DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX12DAGISEL-NEXT: s_endpgm -entry: - %id.x = call i32 @llvm.amdgcn.workitem.id.x() - %result = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 %id.x, i32 1) - store i32 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { -; GFX8DAGISEL-LABEL: divergent_cfg: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 -; GFX8DAGISEL-NEXT: ; implicit-def: $sgpr2 -; GFX8DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX8DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX8DAGISEL-NEXT: ; %bb.1: ; %else -; GFX8DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: s_sub_i32 s3, 0, s6 -; GFX8DAGISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX8DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX8DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX8DAGISEL-NEXT: ; %bb.3: ; %if -; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX8DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] -; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX8DAGISEL-NEXT: s_sub_i32 s6, s6, s8 -; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX8DAGISEL-NEXT: ; %bb.5: -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX8DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8DAGISEL-NEXT: flat_store_dword v[2:3], v1 -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: divergent_cfg: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 -; GFX8GISEL-NEXT: ; implicit-def: $sgpr6 -; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX8GISEL-NEXT: ; %bb.1: ; %else -; GFX8GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8GISEL-NEXT: ; implicit-def: $vgpr0 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: s_sub_i32 s3, 0, s6 -; GFX8GISEL-NEXT: s_mul_i32 s6, s3, s2 -; GFX8GISEL-NEXT: .LBB4_2: ; %Flow -; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_5 -; GFX8GISEL-NEXT: ; %bb.3: ; %if -; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_mov_b32 s6, 0 -; GFX8GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] -; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX8GISEL-NEXT: s_sub_i32 s6, s6, s8 -; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX8GISEL-NEXT: .LBB4_5: ; %endif -; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: divergent_cfg: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 -; GFX9DAGISEL-NEXT: ; implicit-def: $sgpr2 -; GFX9DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX9DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX9DAGISEL-NEXT: ; %bb.1: ; %else -; GFX9DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: s_sub_i32 s3, 0, s6 -; GFX9DAGISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX9DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX9DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX9DAGISEL-NEXT: ; %bb.3: ; %if -; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX9DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] -; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX9DAGISEL-NEXT: s_sub_i32 s6, s6, s8 -; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX9DAGISEL-NEXT: ; %bb.5: -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX9DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: divergent_cfg: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 -; GFX9GISEL-NEXT: ; implicit-def: $sgpr6 -; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX9GISEL-NEXT: ; %bb.1: ; %else -; GFX9GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9GISEL-NEXT: ; implicit-def: $vgpr0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: s_sub_i32 s3, 0, s6 -; GFX9GISEL-NEXT: s_mul_i32 s6, s3, s2 -; GFX9GISEL-NEXT: .LBB4_2: ; %Flow -; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_5 -; GFX9GISEL-NEXT: ; %bb.3: ; %if -; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_mov_b32 s6, 0 -; GFX9GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] -; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX9GISEL-NEXT: s_sub_i32 s6, s6, s8 -; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX9GISEL-NEXT: .LBB4_5: ; %endif -; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX1064DAGISEL-LABEL: divergent_cfg: -; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 -; GFX1064DAGISEL-NEXT: ; implicit-def: $sgpr2 -; GFX1064DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX1064DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1064DAGISEL-NEXT: ; %bb.1: ; %else -; GFX1064DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: s_sub_i32 s3, 0, s6 -; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX1064DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX1064DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if -; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX1064DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] -; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX1064DAGISEL-NEXT: s_sub_i32 s6, s6, s8 -; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1064DAGISEL-NEXT: ; %bb.5: -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX1064DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX1064DAGISEL-NEXT: s_endpgm -; -; GFX1064GISEL-LABEL: divergent_cfg: -; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 -; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6 -; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1064GISEL-NEXT: ; %bb.1: ; %else -; GFX1064GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: s_sub_i32 s3, 0, s6 -; GFX1064GISEL-NEXT: s_mul_i32 s6, s3, s2 -; GFX1064GISEL-NEXT: .LBB4_2: ; %Flow -; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_5 -; GFX1064GISEL-NEXT: ; %bb.3: ; %if -; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064GISEL-NEXT: s_mov_b32 s6, 0 -; GFX1064GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] -; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX1064GISEL-NEXT: s_sub_i32 s6, s6, s8 -; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1064GISEL-NEXT: .LBB4_5: ; %endif -; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1064GISEL-NEXT: s_endpgm -; -; GFX1032DAGISEL-LABEL: divergent_cfg: -; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc_lo, 15, v0 -; GFX1032DAGISEL-NEXT: ; implicit-def: $sgpr1 -; GFX1032DAGISEL-NEXT: s_and_saveexec_b32 s0, vcc_lo -; GFX1032DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0 -; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1032DAGISEL-NEXT: ; %bb.1: ; %else -; GFX1032DAGISEL-NEXT: s_load_dword s1, s[4:5], 0x2c -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: s_sub_i32 s1, 0, s1 -; GFX1032DAGISEL-NEXT: s_mul_i32 s1, s1, s2 -; GFX1032DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX1032DAGISEL-NEXT: s_or_saveexec_b32 s0, s0 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032DAGISEL-NEXT: s_mov_b32 s1, 0 -; GFX1032DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2 -; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 -; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3 -; GFX1032DAGISEL-NEXT: s_sub_i32 s1, s1, s6 -; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1032DAGISEL-NEXT: ; %bb.5: -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1032DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 -; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX1032DAGISEL-NEXT: s_endpgm -; -; GFX1032GISEL-LABEL: divergent_cfg: -; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: v_cmp_le_u32_e32 vcc_lo, 16, v0 -; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0 -; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo -; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1032GISEL-NEXT: ; %bb.1: ; %else -; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c -; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: s_sub_i32 s0, 0, s0 -; GFX1032GISEL-NEXT: s_mul_i32 s0, s0, s2 -; GFX1032GISEL-NEXT: .LBB4_2: ; %Flow -; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_5 -; GFX1032GISEL-NEXT: ; %bb.3: ; %if -; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032GISEL-NEXT: s_mov_b32 s0, 0 -; GFX1032GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2 -; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3 -; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3 -; GFX1032GISEL-NEXT: s_sub_i32 s0, s0, s6 -; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1032GISEL-NEXT: .LBB4_5: ; %endif -; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 -; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[2:3] -; GFX1032GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: divergent_cfg: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], exec -; GFX1164DAGISEL-NEXT: ; implicit-def: $sgpr2 -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1164DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 -; GFX1164DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1164DAGISEL-NEXT: ; %bb.1: ; %else -; GFX1164DAGISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c -; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: s_sub_i32 s3, 0, s6 -; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s3, s2 -; GFX1164DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX1164DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if -; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX1164DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX1164DAGISEL-NEXT: s_sub_i32 s6, s6, s8 -; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1164DAGISEL-NEXT: ; %bb.5: -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX1164DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: divergent_cfg: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], exec -; GFX1164GISEL-NEXT: ; implicit-def: $sgpr6 -; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 -; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1164GISEL-NEXT: ; %bb.1: ; %else -; GFX1164GISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: s_sub_i32 s3, 0, s6 -; GFX1164GISEL-NEXT: s_mul_i32 s6, s3, s2 -; GFX1164GISEL-NEXT: .LBB4_2: ; %Flow -; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_5 -; GFX1164GISEL-NEXT: ; %bb.3: ; %if -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: s_mov_b32 s6, 0 -; GFX1164GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX1164GISEL-NEXT: s_sub_i32 s6, s6, s8 -; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1164GISEL-NEXT: .LBB4_5: ; %endif -; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: divergent_cfg: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1132DAGISEL-NEXT: s_mov_b32 s0, exec_lo -; GFX1132DAGISEL-NEXT: ; implicit-def: $sgpr1 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1132DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 -; GFX1132DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0 -; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1132DAGISEL-NEXT: ; %bb.1: ; %else -; GFX1132DAGISEL-NEXT: s_load_b32 s1, s[4:5], 0x2c -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: s_sub_i32 s1, 0, s1 -; GFX1132DAGISEL-NEXT: s_mul_i32 s1, s1, s2 -; GFX1132DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX1132DAGISEL-NEXT: s_or_saveexec_b32 s0, s0 -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: s_mov_b32 s1, 0 -; GFX1132DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 -; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 -; GFX1132DAGISEL-NEXT: s_sub_i32 s1, s1, s6 -; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1132DAGISEL-NEXT: ; %bb.5: -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1132DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: divergent_cfg: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1132GISEL-NEXT: s_mov_b32 s1, exec_lo -; GFX1132GISEL-NEXT: ; implicit-def: $sgpr0 -; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 -; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1132GISEL-NEXT: ; %bb.1: ; %else -; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: s_sub_i32 s0, 0, s0 -; GFX1132GISEL-NEXT: s_mul_i32 s0, s0, s2 -; GFX1132GISEL-NEXT: .LBB4_2: ; %Flow -; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_5 -; GFX1132GISEL-NEXT: ; %bb.3: ; %if -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: s_mov_b32 s0, 0 -; GFX1132GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3 -; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 -; GFX1132GISEL-NEXT: s_sub_i32 s0, s0, s6 -; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1132GISEL-NEXT: .LBB4_5: ; %endif -; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 -; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 -; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[2:3] -; GFX1132GISEL-NEXT: s_endpgm -; -; GFX12DAGISEL-LABEL: divergent_cfg: -; GFX12DAGISEL: ; %bb.0: ; %entry -; GFX12DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX12DAGISEL-NEXT: s_mov_b32 s0, exec_lo -; GFX12DAGISEL-NEXT: ; implicit-def: $sgpr1 -; GFX12DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 -; GFX12DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0 -; GFX12DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX12DAGISEL-NEXT: ; %bb.1: ; %else -; GFX12DAGISEL-NEXT: s_load_b32 s1, s[4:5], 0x2c -; GFX12DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX12DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX12DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) -; GFX12DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX12DAGISEL-NEXT: s_wait_kmcnt 0x0 -; GFX12DAGISEL-NEXT: s_sub_co_i32 s1, 0, s1 -; GFX12DAGISEL-NEXT: s_mul_i32 s1, s1, s2 -; GFX12DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX12DAGISEL-NEXT: s_or_saveexec_b32 s0, s0 -; GFX12DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX12DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX12DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX12DAGISEL-NEXT: ; %bb.3: ; %if -; GFX12DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX12DAGISEL-NEXT: s_mov_b32 s1, 0 -; GFX12DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX12DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 -; GFX12DAGISEL-NEXT: s_wait_alu 0xfffe -; GFX12DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 -; GFX12DAGISEL-NEXT: s_bitset0_b32 s2, s3 -; GFX12DAGISEL-NEXT: s_sub_co_i32 s1, s1, s6 -; GFX12DAGISEL-NEXT: s_wait_alu 0xfffe -; GFX12DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX12DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX12DAGISEL-NEXT: ; %bb.5: -; GFX12DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX12DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX12DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 -; GFX12DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX12DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX12DAGISEL-NEXT: s_wait_kmcnt 0x0 -; GFX12DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX12DAGISEL-NEXT: s_endpgm -entry: - %tid = call i32 @llvm.amdgcn.workitem.id.x() - %d_cmp = icmp ult i32 %tid, 16 - br i1 %d_cmp, label %if, label %else - -if: - %reducedValTid = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 %tid, i32 1) - br label %endif - -else: - %reducedValIn = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 %in, i32 1) - br label %endif - -endif: - %combine = phi i32 [%reducedValTid, %if], [%reducedValIn, %else] - store i32 %combine, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @uniform_value_i64(ptr addrspace(1) %out, i64 %in) { -; GFX8DAGISEL-LABEL: uniform_value_i64: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX8DAGISEL-NEXT: s_sub_i32 s4, 0, s4 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: s_ashr_i32 s0, s4, 31 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: s_mul_i32 s1, s2, s0 -; GFX8DAGISEL-NEXT: s_mul_i32 s0, s2, s4 -; GFX8DAGISEL-NEXT: s_mul_hi_u32 s2, s2, s4 -; GFX8DAGISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX8DAGISEL-NEXT: s_add_u32 s2, s2, s3 -; GFX8DAGISEL-NEXT: s_add_u32 s1, s2, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: uniform_value_i64: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX8GISEL-NEXT: s_sub_i32 s5, 0, s4 -; GFX8GISEL-NEXT: s_ashr_i32 s4, s5, 31 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: s_mul_i32 s6, s2, s4 -; GFX8GISEL-NEXT: s_mul_i32 s4, s2, s5 -; GFX8GISEL-NEXT: s_mul_hi_u32 s2, s2, s5 -; GFX8GISEL-NEXT: s_mul_i32 s3, s3, s5 -; GFX8GISEL-NEXT: s_add_u32 s2, s2, s3 -; GFX8GISEL-NEXT: s_add_u32 s5, s2, s6 -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s5 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: uniform_value_i64: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX9DAGISEL-NEXT: s_sub_i32 s5, 0, s4 -; GFX9DAGISEL-NEXT: s_ashr_i32 s4, s5, 31 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: s_mul_i32 s6, s2, s4 -; GFX9DAGISEL-NEXT: s_mul_i32 s4, s2, s5 -; GFX9DAGISEL-NEXT: s_mul_hi_u32 s2, s2, s5 -; GFX9DAGISEL-NEXT: s_mul_i32 s3, s3, s5 -; GFX9DAGISEL-NEXT: s_add_u32 s2, s2, s3 -; GFX9DAGISEL-NEXT: s_add_u32 s5, s2, s6 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s5 -; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: uniform_value_i64: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX9GISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX9GISEL-NEXT: s_sub_i32 s5, 0, s4 -; GFX9GISEL-NEXT: s_ashr_i32 s4, s5, 31 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: s_mul_i32 s6, s2, s4 -; GFX9GISEL-NEXT: s_mul_i32 s4, s2, s5 -; GFX9GISEL-NEXT: s_mul_hi_u32 s2, s2, s5 -; GFX9GISEL-NEXT: s_mul_i32 s3, s3, s5 -; GFX9GISEL-NEXT: s_add_u32 s2, s2, s3 -; GFX9GISEL-NEXT: s_add_u32 s5, s2, s6 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s5 -; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX1064DAGISEL-LABEL: uniform_value_i64: -; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX1064DAGISEL-NEXT: s_sub_i32 s4, 0, s4 -; GFX1064DAGISEL-NEXT: s_ashr_i32 s5, s4, 31 -; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: s_mul_hi_u32 s6, s2, s4 -; GFX1064DAGISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1064DAGISEL-NEXT: s_mul_i32 s5, s2, s5 -; GFX1064DAGISEL-NEXT: s_add_u32 s3, s6, s3 -; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1064DAGISEL-NEXT: s_add_u32 s3, s3, s5 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1064DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1064DAGISEL-NEXT: s_endpgm -; -; GFX1064GISEL-LABEL: uniform_value_i64: -; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX1064GISEL-NEXT: s_sub_i32 s4, 0, s4 -; GFX1064GISEL-NEXT: s_ashr_i32 s5, s4, 31 -; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: s_mul_hi_u32 s6, s2, s4 -; GFX1064GISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1064GISEL-NEXT: s_mul_i32 s5, s2, s5 -; GFX1064GISEL-NEXT: s_add_u32 s3, s6, s3 -; GFX1064GISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1064GISEL-NEXT: s_add_u32 s3, s3, s5 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1064GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1064GISEL-NEXT: s_endpgm -; -; GFX1032DAGISEL-LABEL: uniform_value_i64: -; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: s_mov_b32 s4, exec_lo -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4 -; GFX1032DAGISEL-NEXT: s_sub_i32 s4, 0, s4 -; GFX1032DAGISEL-NEXT: s_ashr_i32 s5, s4, 31 -; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: s_mul_hi_u32 s6, s2, s4 -; GFX1032DAGISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1032DAGISEL-NEXT: s_mul_i32 s5, s2, s5 -; GFX1032DAGISEL-NEXT: s_add_u32 s3, s6, s3 -; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1032DAGISEL-NEXT: s_add_u32 s3, s3, s5 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1032DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1032DAGISEL-NEXT: s_endpgm -; -; GFX1032GISEL-LABEL: uniform_value_i64: -; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX1032GISEL-NEXT: s_mov_b32 s4, exec_lo -; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s4, s4 -; GFX1032GISEL-NEXT: s_sub_i32 s4, 0, s4 -; GFX1032GISEL-NEXT: s_ashr_i32 s5, s4, 31 -; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: s_mul_hi_u32 s6, s2, s4 -; GFX1032GISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1032GISEL-NEXT: s_mul_i32 s5, s2, s5 -; GFX1032GISEL-NEXT: s_add_u32 s3, s6, s3 -; GFX1032GISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1032GISEL-NEXT: s_add_u32 s3, s3, s5 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1032GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1032GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: uniform_value_i64: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: s_sub_i32 s4, 0, s4 -; GFX1164DAGISEL-NEXT: s_ashr_i32 s5, s4, 31 -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: s_mul_hi_u32 s6, s2, s4 -; GFX1164DAGISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1164DAGISEL-NEXT: s_mul_i32 s5, s2, s5 -; GFX1164DAGISEL-NEXT: s_add_u32 s3, s6, s3 -; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1164DAGISEL-NEXT: s_add_u32 s3, s3, s5 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: uniform_value_i64: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX1164GISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: s_sub_i32 s4, 0, s4 -; GFX1164GISEL-NEXT: s_ashr_i32 s5, s4, 31 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: s_mul_hi_u32 s6, s2, s4 -; GFX1164GISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1164GISEL-NEXT: s_mul_i32 s5, s2, s5 -; GFX1164GISEL-NEXT: s_add_u32 s3, s6, s3 -; GFX1164GISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1164GISEL-NEXT: s_add_u32 s3, s3, s5 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: uniform_value_i64: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: s_mov_b32 s4, exec_lo -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4 -; GFX1132DAGISEL-NEXT: s_sub_i32 s4, 0, s4 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_ashr_i32 s5, s4, 31 -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: s_mul_hi_u32 s6, s2, s4 -; GFX1132DAGISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1132DAGISEL-NEXT: s_mul_i32 s5, s2, s5 -; GFX1132DAGISEL-NEXT: s_add_u32 s3, s6, s3 -; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1132DAGISEL-NEXT: s_add_u32 s3, s3, s5 -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3 -; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: uniform_value_i64: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX1132GISEL-NEXT: s_mov_b32 s4, exec_lo -; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s4, s4 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: s_sub_i32 s4, 0, s4 -; GFX1132GISEL-NEXT: s_ashr_i32 s5, s4, 31 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: s_mul_hi_u32 s6, s2, s4 -; GFX1132GISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1132GISEL-NEXT: s_mul_i32 s5, s2, s5 -; GFX1132GISEL-NEXT: s_add_u32 s3, s6, s3 -; GFX1132GISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1132GISEL-NEXT: s_add_u32 s3, s3, s5 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -; -; GFX12DAGISEL-LABEL: uniform_value_i64: -; GFX12DAGISEL: ; %bb.0: ; %entry -; GFX12DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX12DAGISEL-NEXT: s_mov_b32 s4, exec_lo -; GFX12DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX12DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4 -; GFX12DAGISEL-NEXT: s_sub_co_i32 s4, 0, s4 -; GFX12DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12DAGISEL-NEXT: s_ashr_i32 s5, s4, 31 -; GFX12DAGISEL-NEXT: s_wait_kmcnt 0x0 -; GFX12DAGISEL-NEXT: s_mul_hi_u32 s6, s2, s4 -; GFX12DAGISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX12DAGISEL-NEXT: s_mul_i32 s5, s2, s5 -; GFX12DAGISEL-NEXT: s_add_co_u32 s3, s6, s3 -; GFX12DAGISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX12DAGISEL-NEXT: s_add_co_u32 s3, s3, s5 -; GFX12DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX12DAGISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3 -; GFX12DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX12DAGISEL-NEXT: s_endpgm -entry: - %result = call i64 @llvm.amdgcn.wave.reduce.sub.i64(i64 %in, i32 1) - store i64 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: const_value_i64: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8DAGISEL-NEXT: s_sub_i32 s3, 0, s2 -; GFX8DAGISEL-NEXT: s_ashr_i32 s2, s3, 31 -; GFX8DAGISEL-NEXT: s_mul_i32 s4, s2, 0x7b -; GFX8DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX8DAGISEL-NEXT: s_mul_hi_u32 s5, 0x7b, s3 -; GFX8DAGISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX8DAGISEL-NEXT: s_add_u32 s3, s5, s3 -; GFX8DAGISEL-NEXT: s_add_u32 s3, s3, s4 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s3 -; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: const_value_i64: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8GISEL-NEXT: s_sub_i32 s3, 0, s2 -; GFX8GISEL-NEXT: s_ashr_i32 s2, s3, 31 -; GFX8GISEL-NEXT: s_mul_i32 s4, s2, 0x7b -; GFX8GISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX8GISEL-NEXT: s_mul_hi_u32 s5, 0x7b, s3 -; GFX8GISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX8GISEL-NEXT: s_add_u32 s3, s5, s3 -; GFX8GISEL-NEXT: s_add_u32 s3, s3, s4 -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: const_value_i64: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9DAGISEL-NEXT: s_sub_i32 s3, 0, s2 -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: s_ashr_i32 s2, s3, 31 -; GFX9DAGISEL-NEXT: s_mul_i32 s4, s2, 0x7b -; GFX9DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX9DAGISEL-NEXT: s_mul_hi_u32 s5, 0x7b, s3 -; GFX9DAGISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX9DAGISEL-NEXT: s_add_u32 s3, s5, s3 -; GFX9DAGISEL-NEXT: s_add_u32 s3, s3, s4 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: const_value_i64: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9GISEL-NEXT: s_sub_i32 s3, 0, s2 -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: s_ashr_i32 s2, s3, 31 -; GFX9GISEL-NEXT: s_mul_i32 s4, s2, 0x7b -; GFX9GISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX9GISEL-NEXT: s_mul_hi_u32 s5, 0x7b, s3 -; GFX9GISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX9GISEL-NEXT: s_add_u32 s3, s5, s3 -; GFX9GISEL-NEXT: s_add_u32 s3, s3, s4 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX1064DAGISEL-LABEL: const_value_i64: -; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064DAGISEL-NEXT: s_sub_i32 s2, 0, s2 -; GFX1064DAGISEL-NEXT: s_ashr_i32 s3, s2, 31 -; GFX1064DAGISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s2 -; GFX1064DAGISEL-NEXT: s_mul_i32 s5, s2, 0 -; GFX1064DAGISEL-NEXT: s_mulk_i32 s3, 0x7b -; GFX1064DAGISEL-NEXT: s_add_u32 s4, s4, s5 -; GFX1064DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1064DAGISEL-NEXT: s_add_u32 s3, s4, s3 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1064DAGISEL-NEXT: s_endpgm -; -; GFX1064GISEL-LABEL: const_value_i64: -; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064GISEL-NEXT: s_sub_i32 s2, 0, s2 -; GFX1064GISEL-NEXT: s_ashr_i32 s3, s2, 31 -; GFX1064GISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s2 -; GFX1064GISEL-NEXT: s_mul_i32 s5, s2, 0 -; GFX1064GISEL-NEXT: s_mulk_i32 s3, 0x7b -; GFX1064GISEL-NEXT: s_add_u32 s4, s4, s5 -; GFX1064GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1064GISEL-NEXT: s_add_u32 s3, s4, s3 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1064GISEL-NEXT: s_endpgm -; -; GFX1032DAGISEL-LABEL: const_value_i64: -; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032DAGISEL-NEXT: s_sub_i32 s2, 0, s2 -; GFX1032DAGISEL-NEXT: s_ashr_i32 s3, s2, 31 -; GFX1032DAGISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s2 -; GFX1032DAGISEL-NEXT: s_mul_i32 s5, s2, 0 -; GFX1032DAGISEL-NEXT: s_mulk_i32 s3, 0x7b -; GFX1032DAGISEL-NEXT: s_add_u32 s4, s4, s5 -; GFX1032DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1032DAGISEL-NEXT: s_add_u32 s3, s4, s3 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1032DAGISEL-NEXT: s_endpgm -; -; GFX1032GISEL-LABEL: const_value_i64: -; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032GISEL-NEXT: s_sub_i32 s2, 0, s2 -; GFX1032GISEL-NEXT: s_ashr_i32 s3, s2, 31 -; GFX1032GISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s2 -; GFX1032GISEL-NEXT: s_mul_i32 s5, s2, 0 -; GFX1032GISEL-NEXT: s_mulk_i32 s3, 0x7b -; GFX1032GISEL-NEXT: s_add_u32 s4, s4, s5 -; GFX1032GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1032GISEL-NEXT: s_add_u32 s3, s4, s3 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1032GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: const_value_i64: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: s_sub_i32 s2, 0, s2 -; GFX1164DAGISEL-NEXT: s_ashr_i32 s3, s2, 31 -; GFX1164DAGISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s2 -; GFX1164DAGISEL-NEXT: s_mul_i32 s5, s2, 0 -; GFX1164DAGISEL-NEXT: s_mulk_i32 s3, 0x7b -; GFX1164DAGISEL-NEXT: s_add_u32 s4, s4, s5 -; GFX1164DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1164DAGISEL-NEXT: s_add_u32 s3, s4, s3 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: const_value_i64: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: s_sub_i32 s2, 0, s2 -; GFX1164GISEL-NEXT: s_ashr_i32 s3, s2, 31 -; GFX1164GISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s2 -; GFX1164GISEL-NEXT: s_mul_i32 s5, s2, 0 -; GFX1164GISEL-NEXT: s_mulk_i32 s3, 0x7b -; GFX1164GISEL-NEXT: s_add_u32 s4, s4, s5 -; GFX1164GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1164GISEL-NEXT: s_add_u32 s3, s4, s3 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: const_value_i64: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132DAGISEL-NEXT: s_sub_i32 s2, 0, s2 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_ashr_i32 s3, s2, 31 -; GFX1132DAGISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s2 -; GFX1132DAGISEL-NEXT: s_mul_i32 s5, s2, 0 -; GFX1132DAGISEL-NEXT: s_mulk_i32 s3, 0x7b -; GFX1132DAGISEL-NEXT: s_add_u32 s4, s4, s5 -; GFX1132DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1132DAGISEL-NEXT: s_add_u32 s3, s4, s3 -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3 -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: const_value_i64: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: s_sub_i32 s2, 0, s2 -; GFX1132GISEL-NEXT: s_ashr_i32 s3, s2, 31 -; GFX1132GISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s2 -; GFX1132GISEL-NEXT: s_mul_i32 s5, s2, 0 -; GFX1132GISEL-NEXT: s_mulk_i32 s3, 0x7b -; GFX1132GISEL-NEXT: s_add_u32 s4, s4, s5 -; GFX1132GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1132GISEL-NEXT: s_add_u32 s3, s4, s3 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -; -; GFX12DAGISEL-LABEL: const_value_i64: -; GFX12DAGISEL: ; %bb.0: ; %entry -; GFX12DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX12DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX12DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX12DAGISEL-NEXT: ; implicit-def: $vgpr0 +; GFX12DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) ; GFX12DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX12DAGISEL-NEXT: s_sub_co_i32 s2, 0, s2 -; GFX12DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12DAGISEL-NEXT: s_ashr_i32 s3, s2, 31 -; GFX12DAGISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s2 -; GFX12DAGISEL-NEXT: s_mul_i32 s5, s2, 0 -; GFX12DAGISEL-NEXT: s_mulk_i32 s3, 0x7b -; GFX12DAGISEL-NEXT: s_add_co_u32 s4, s4, s5 -; GFX12DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX12DAGISEL-NEXT: s_add_co_u32 s3, s4, s3 -; GFX12DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX12DAGISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3 ; GFX12DAGISEL-NEXT: s_wait_kmcnt 0x0 -; GFX12DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX12DAGISEL-NEXT: s_sub_co_i32 s1, 0, s1 +; GFX12DAGISEL-NEXT: s_mul_i32 s1, s1, s2 +; GFX12DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX12DAGISEL-NEXT: s_or_saveexec_b32 s0, s0 +; GFX12DAGISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX12DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX12DAGISEL-NEXT: ; %bb.3: ; %if +; GFX12DAGISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX12DAGISEL-NEXT: s_mov_b32 s1, 0 +; GFX12DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX12DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 +; GFX12DAGISEL-NEXT: s_wait_alu 0xfffe +; GFX12DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 +; GFX12DAGISEL-NEXT: s_bitset0_b32 s2, s3 +; GFX12DAGISEL-NEXT: s_sub_co_i32 s1, s1, s6 +; GFX12DAGISEL-NEXT: s_wait_alu 0xfffe +; GFX12DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX12DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX12DAGISEL-NEXT: ; %bb.5: +; GFX12DAGISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX12DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX12DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX12DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX12DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX12DAGISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX12DAGISEL-NEXT: s_endpgm entry: - %result = call i64 @llvm.amdgcn.wave.reduce.sub.i64(i64 123, i32 1) - store i64 %result, ptr addrspace(1) %out + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %d_cmp = icmp ult i32 %tid, 16 + br i1 %d_cmp, label %if, label %else + +if: + %reducedValTid = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 %tid, i32 1) + br label %endif + +else: + %reducedValIn = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 %in, i32 1) + br label %endif + +endif: + %combine = phi i32 [%reducedValTid, %if], [%reducedValIn, %else] + store i32 %combine, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) { -; GFX8DAGISEL-LABEL: poison_value_i64: +define amdgpu_kernel void @uniform_value_i64(ptr addrspace(1) %out, i64 %in) { +; GFX8DAGISEL-LABEL: uniform_value_i64: ; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8DAGISEL-NEXT: s_sub_i32 s3, 0, s2 -; GFX8DAGISEL-NEXT: s_ashr_i32 s2, s3, 31 +; GFX8DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], exec +; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] +; GFX8DAGISEL-NEXT: s_sub_i32 s4, 0, s4 ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: s_mul_i32 s4, s0, s2 -; GFX8DAGISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX8DAGISEL-NEXT: s_mul_hi_u32 s5, s0, s3 -; GFX8DAGISEL-NEXT: s_mul_i32 s3, s1, s3 -; GFX8DAGISEL-NEXT: s_add_u32 s3, s5, s3 -; GFX8DAGISEL-NEXT: s_add_u32 s3, s3, s4 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX8DAGISEL-NEXT: s_ashr_i32 s0, s4, 31 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s3 +; GFX8DAGISEL-NEXT: s_mul_i32 s1, s2, s0 +; GFX8DAGISEL-NEXT: s_mul_i32 s0, s2, s4 +; GFX8DAGISEL-NEXT: s_mul_hi_u32 s2, s2, s4 +; GFX8DAGISEL-NEXT: s_mul_i32 s3, s3, s4 +; GFX8DAGISEL-NEXT: s_add_u32 s2, s2, s3 +; GFX8DAGISEL-NEXT: s_add_u32 s1, s2, s1 +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 ; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; GFX8DAGISEL-NEXT: s_endpgm ; -; GFX8GISEL-LABEL: poison_value_i64: +; GFX8GISEL-LABEL: uniform_value_i64: ; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8GISEL-NEXT: s_sub_i32 s3, 0, s2 -; GFX8GISEL-NEXT: s_ashr_i32 s2, s3, 31 +; GFX8GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX8GISEL-NEXT: s_mov_b64 s[4:5], exec +; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] +; GFX8GISEL-NEXT: s_sub_i32 s5, 0, s4 +; GFX8GISEL-NEXT: s_ashr_i32 s4, s5, 31 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: s_mul_i32 s4, s0, s2 -; GFX8GISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX8GISEL-NEXT: s_mul_hi_u32 s5, s0, s3 -; GFX8GISEL-NEXT: s_mul_i32 s3, s1, s3 -; GFX8GISEL-NEXT: s_add_u32 s3, s5, s3 -; GFX8GISEL-NEXT: s_add_u32 s3, s3, s4 -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX8GISEL-NEXT: s_mul_i32 s6, s2, s4 +; GFX8GISEL-NEXT: s_mul_i32 s4, s2, s5 +; GFX8GISEL-NEXT: s_mul_hi_u32 s2, s2, s5 +; GFX8GISEL-NEXT: s_mul_i32 s3, s3, s5 +; GFX8GISEL-NEXT: s_add_u32 s2, s2, s3 +; GFX8GISEL-NEXT: s_add_u32 s5, s2, s6 +; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s3 +; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s5 ; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0 ; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GFX8GISEL-NEXT: s_endpgm ; -; GFX9DAGISEL-LABEL: poison_value_i64: +; GFX9DAGISEL-LABEL: uniform_value_i64: ; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9DAGISEL-NEXT: s_sub_i32 s3, 0, s2 -; GFX9DAGISEL-NEXT: s_ashr_i32 s2, s3, 31 +; GFX9DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], exec +; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] +; GFX9DAGISEL-NEXT: s_sub_i32 s5, 0, s4 +; GFX9DAGISEL-NEXT: s_ashr_i32 s4, s5, 31 ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: s_mul_i32 s4, s0, s2 -; GFX9DAGISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX9DAGISEL-NEXT: s_mul_hi_u32 s5, s0, s3 -; GFX9DAGISEL-NEXT: s_mul_i32 s3, s1, s3 -; GFX9DAGISEL-NEXT: s_add_u32 s3, s5, s3 -; GFX9DAGISEL-NEXT: s_add_u32 s3, s3, s4 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX9DAGISEL-NEXT: s_mul_i32 s6, s2, s4 +; GFX9DAGISEL-NEXT: s_mul_i32 s4, s2, s5 +; GFX9DAGISEL-NEXT: s_mul_hi_u32 s2, s2, s5 +; GFX9DAGISEL-NEXT: s_mul_i32 s3, s3, s5 +; GFX9DAGISEL-NEXT: s_add_u32 s2, s2, s3 +; GFX9DAGISEL-NEXT: s_add_u32 s5, s2, s6 +; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s3 +; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s5 ; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX9DAGISEL-NEXT: s_endpgm ; -; GFX9GISEL-LABEL: poison_value_i64: +; GFX9GISEL-LABEL: uniform_value_i64: ; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9GISEL-NEXT: s_sub_i32 s3, 0, s2 -; GFX9GISEL-NEXT: s_ashr_i32 s2, s3, 31 +; GFX9GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9GISEL-NEXT: s_mov_b64 s[4:5], exec +; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] +; GFX9GISEL-NEXT: s_sub_i32 s5, 0, s4 +; GFX9GISEL-NEXT: s_ashr_i32 s4, s5, 31 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: s_mul_i32 s4, s0, s2 -; GFX9GISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX9GISEL-NEXT: s_mul_hi_u32 s5, s0, s3 -; GFX9GISEL-NEXT: s_mul_i32 s3, s1, s3 -; GFX9GISEL-NEXT: s_add_u32 s3, s5, s3 -; GFX9GISEL-NEXT: s_add_u32 s3, s3, s4 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s3 +; GFX9GISEL-NEXT: s_mul_i32 s6, s2, s4 +; GFX9GISEL-NEXT: s_mul_i32 s4, s2, s5 +; GFX9GISEL-NEXT: s_mul_hi_u32 s2, s2, s5 +; GFX9GISEL-NEXT: s_mul_i32 s3, s3, s5 +; GFX9GISEL-NEXT: s_add_u32 s2, s2, s3 +; GFX9GISEL-NEXT: s_add_u32 s5, s2, s6 +; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4 +; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s5 ; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX9GISEL-NEXT: s_endpgm ; -; GFX1064DAGISEL-LABEL: poison_value_i64: +; GFX1064DAGISEL-LABEL: uniform_value_i64: ; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1064DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064DAGISEL-NEXT: s_sub_i32 s2, 0, s2 -; GFX1064DAGISEL-NEXT: s_ashr_i32 s3, s2, 31 +; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] +; GFX1064DAGISEL-NEXT: s_sub_i32 s4, 0, s4 +; GFX1064DAGISEL-NEXT: s_ashr_i32 s5, s4, 31 ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: s_mul_hi_u32 s4, s0, s2 -; GFX1064DAGISEL-NEXT: s_mul_i32 s5, s1, s2 -; GFX1064DAGISEL-NEXT: s_mul_i32 s3, s0, s3 -; GFX1064DAGISEL-NEXT: s_add_u32 s4, s4, s5 -; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1064DAGISEL-NEXT: s_add_u32 s3, s4, s3 +; GFX1064DAGISEL-NEXT: s_mul_hi_u32 s6, s2, s4 +; GFX1064DAGISEL-NEXT: s_mul_i32 s3, s3, s4 +; GFX1064DAGISEL-NEXT: s_mul_i32 s5, s2, s5 +; GFX1064DAGISEL-NEXT: s_add_u32 s3, s6, s3 +; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1064DAGISEL-NEXT: s_add_u32 s3, s3, s5 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1064DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX1064DAGISEL-NEXT: s_endpgm ; -; GFX1064GISEL-LABEL: poison_value_i64: +; GFX1064GISEL-LABEL: uniform_value_i64: ; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1064GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064GISEL-NEXT: s_sub_i32 s2, 0, s2 -; GFX1064GISEL-NEXT: s_ashr_i32 s3, s2, 31 +; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] +; GFX1064GISEL-NEXT: s_sub_i32 s4, 0, s4 +; GFX1064GISEL-NEXT: s_ashr_i32 s5, s4, 31 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: s_mul_hi_u32 s4, s0, s2 -; GFX1064GISEL-NEXT: s_mul_i32 s5, s1, s2 -; GFX1064GISEL-NEXT: s_mul_i32 s3, s0, s3 -; GFX1064GISEL-NEXT: s_add_u32 s4, s4, s5 -; GFX1064GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1064GISEL-NEXT: s_add_u32 s3, s4, s3 +; GFX1064GISEL-NEXT: s_mul_hi_u32 s6, s2, s4 +; GFX1064GISEL-NEXT: s_mul_i32 s3, s3, s4 +; GFX1064GISEL-NEXT: s_mul_i32 s5, s2, s5 +; GFX1064GISEL-NEXT: s_add_u32 s3, s6, s3 +; GFX1064GISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1064GISEL-NEXT: s_add_u32 s3, s3, s5 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1064GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX1064GISEL-NEXT: s_endpgm ; -; GFX1032DAGISEL-LABEL: poison_value_i64: +; GFX1032DAGISEL-LABEL: uniform_value_i64: ; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1032DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX1032DAGISEL-NEXT: s_mov_b32 s4, exec_lo ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032DAGISEL-NEXT: s_sub_i32 s2, 0, s2 -; GFX1032DAGISEL-NEXT: s_ashr_i32 s3, s2, 31 +; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4 +; GFX1032DAGISEL-NEXT: s_sub_i32 s4, 0, s4 +; GFX1032DAGISEL-NEXT: s_ashr_i32 s5, s4, 31 ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: s_mul_hi_u32 s4, s0, s2 -; GFX1032DAGISEL-NEXT: s_mul_i32 s5, s1, s2 -; GFX1032DAGISEL-NEXT: s_mul_i32 s3, s0, s3 -; GFX1032DAGISEL-NEXT: s_add_u32 s4, s4, s5 -; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1032DAGISEL-NEXT: s_add_u32 s3, s4, s3 +; GFX1032DAGISEL-NEXT: s_mul_hi_u32 s6, s2, s4 +; GFX1032DAGISEL-NEXT: s_mul_i32 s3, s3, s4 +; GFX1032DAGISEL-NEXT: s_mul_i32 s5, s2, s5 +; GFX1032DAGISEL-NEXT: s_add_u32 s3, s6, s3 +; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1032DAGISEL-NEXT: s_add_u32 s3, s3, s5 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1032DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX1032DAGISEL-NEXT: s_endpgm ; -; GFX1032GISEL-LABEL: poison_value_i64: +; GFX1032GISEL-LABEL: uniform_value_i64: ; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1032GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX1032GISEL-NEXT: s_mov_b32 s4, exec_lo ; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032GISEL-NEXT: s_sub_i32 s2, 0, s2 -; GFX1032GISEL-NEXT: s_ashr_i32 s3, s2, 31 +; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s4, s4 +; GFX1032GISEL-NEXT: s_sub_i32 s4, 0, s4 +; GFX1032GISEL-NEXT: s_ashr_i32 s5, s4, 31 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: s_mul_hi_u32 s4, s0, s2 -; GFX1032GISEL-NEXT: s_mul_i32 s5, s1, s2 -; GFX1032GISEL-NEXT: s_mul_i32 s3, s0, s3 -; GFX1032GISEL-NEXT: s_add_u32 s4, s4, s5 -; GFX1032GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1032GISEL-NEXT: s_add_u32 s3, s4, s3 +; GFX1032GISEL-NEXT: s_mul_hi_u32 s6, s2, s4 +; GFX1032GISEL-NEXT: s_mul_i32 s3, s3, s4 +; GFX1032GISEL-NEXT: s_mul_i32 s5, s2, s5 +; GFX1032GISEL-NEXT: s_add_u32 s3, s6, s3 +; GFX1032GISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1032GISEL-NEXT: s_add_u32 s3, s3, s5 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1032GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX1032GISEL-NEXT: s_endpgm ; -; GFX1164DAGISEL-LABEL: poison_value_i64: +; GFX1164DAGISEL-LABEL: uniform_value_i64: ; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1164DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1164DAGISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: s_sub_i32 s2, 0, s2 -; GFX1164DAGISEL-NEXT: s_ashr_i32 s3, s2, 31 +; GFX1164DAGISEL-NEXT: s_sub_i32 s4, 0, s4 +; GFX1164DAGISEL-NEXT: s_ashr_i32 s5, s4, 31 ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: s_mul_hi_u32 s4, s0, s2 -; GFX1164DAGISEL-NEXT: s_mul_i32 s5, s1, s2 -; GFX1164DAGISEL-NEXT: s_mul_i32 s3, s0, s3 -; GFX1164DAGISEL-NEXT: s_add_u32 s4, s4, s5 -; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1164DAGISEL-NEXT: s_add_u32 s3, s4, s3 +; GFX1164DAGISEL-NEXT: s_mul_hi_u32 s6, s2, s4 +; GFX1164DAGISEL-NEXT: s_mul_i32 s3, s3, s4 +; GFX1164DAGISEL-NEXT: s_mul_i32 s5, s2, s5 +; GFX1164DAGISEL-NEXT: s_add_u32 s3, s6, s3 +; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1164DAGISEL-NEXT: s_add_u32 s3, s3, s5 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX1164DAGISEL-NEXT: s_endpgm ; -; GFX1164GISEL-LABEL: poison_value_i64: +; GFX1164GISEL-LABEL: uniform_value_i64: ; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1164GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1164GISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: s_sub_i32 s2, 0, s2 -; GFX1164GISEL-NEXT: s_ashr_i32 s3, s2, 31 +; GFX1164GISEL-NEXT: s_sub_i32 s4, 0, s4 +; GFX1164GISEL-NEXT: s_ashr_i32 s5, s4, 31 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: s_mul_hi_u32 s4, s0, s2 -; GFX1164GISEL-NEXT: s_mul_i32 s5, s1, s2 -; GFX1164GISEL-NEXT: s_mul_i32 s3, s0, s3 -; GFX1164GISEL-NEXT: s_add_u32 s4, s4, s5 -; GFX1164GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1164GISEL-NEXT: s_add_u32 s3, s4, s3 +; GFX1164GISEL-NEXT: s_mul_hi_u32 s6, s2, s4 +; GFX1164GISEL-NEXT: s_mul_i32 s3, s3, s4 +; GFX1164GISEL-NEXT: s_mul_i32 s5, s2, s5 +; GFX1164GISEL-NEXT: s_add_u32 s3, s6, s3 +; GFX1164GISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1164GISEL-NEXT: s_add_u32 s3, s3, s5 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX1164GISEL-NEXT: s_endpgm ; -; GFX1132DAGISEL-LABEL: poison_value_i64: +; GFX1132DAGISEL-LABEL: uniform_value_i64: ; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1132DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1132DAGISEL-NEXT: s_mov_b32 s4, exec_lo ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132DAGISEL-NEXT: s_sub_i32 s2, 0, s2 +; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4 +; GFX1132DAGISEL-NEXT: s_sub_i32 s4, 0, s4 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_ashr_i32 s3, s2, 31 +; GFX1132DAGISEL-NEXT: s_ashr_i32 s5, s4, 31 ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: s_mul_hi_u32 s4, s0, s2 -; GFX1132DAGISEL-NEXT: s_mul_i32 s5, s1, s2 -; GFX1132DAGISEL-NEXT: s_mul_i32 s3, s0, s3 -; GFX1132DAGISEL-NEXT: s_add_u32 s4, s4, s5 -; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1132DAGISEL-NEXT: s_add_u32 s3, s4, s3 +; GFX1132DAGISEL-NEXT: s_mul_hi_u32 s6, s2, s4 +; GFX1132DAGISEL-NEXT: s_mul_i32 s3, s3, s4 +; GFX1132DAGISEL-NEXT: s_mul_i32 s5, s2, s5 +; GFX1132DAGISEL-NEXT: s_add_u32 s3, s6, s3 +; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1132DAGISEL-NEXT: s_add_u32 s3, s3, s5 ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3 ; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX1132DAGISEL-NEXT: s_endpgm ; -; GFX1132GISEL-LABEL: poison_value_i64: +; GFX1132GISEL-LABEL: uniform_value_i64: ; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1132GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1132GISEL-NEXT: s_mov_b32 s4, exec_lo ; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 +; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s4, s4 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: s_sub_i32 s2, 0, s2 -; GFX1132GISEL-NEXT: s_ashr_i32 s3, s2, 31 +; GFX1132GISEL-NEXT: s_sub_i32 s4, 0, s4 +; GFX1132GISEL-NEXT: s_ashr_i32 s5, s4, 31 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: s_mul_hi_u32 s4, s0, s2 -; GFX1132GISEL-NEXT: s_mul_i32 s5, s1, s2 -; GFX1132GISEL-NEXT: s_mul_i32 s3, s0, s3 -; GFX1132GISEL-NEXT: s_add_u32 s4, s4, s5 -; GFX1132GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1132GISEL-NEXT: s_add_u32 s3, s4, s3 +; GFX1132GISEL-NEXT: s_mul_hi_u32 s6, s2, s4 +; GFX1132GISEL-NEXT: s_mul_i32 s3, s3, s4 +; GFX1132GISEL-NEXT: s_mul_i32 s5, s2, s5 +; GFX1132GISEL-NEXT: s_add_u32 s3, s6, s3 +; GFX1132GISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1132GISEL-NEXT: s_add_u32 s3, s3, s5 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX1132GISEL-NEXT: s_endpgm ; -; GFX12DAGISEL-LABEL: poison_value_i64: +; GFX12DAGISEL-LABEL: uniform_value_i64: ; GFX12DAGISEL: ; %bb.0: ; %entry -; GFX12DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX12DAGISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX12DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX12DAGISEL-NEXT: s_mov_b32 s4, exec_lo ; GFX12DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX12DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX12DAGISEL-NEXT: s_sub_co_i32 s2, 0, s2 +; GFX12DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4 +; GFX12DAGISEL-NEXT: s_sub_co_i32 s4, 0, s4 ; GFX12DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12DAGISEL-NEXT: s_ashr_i32 s3, s2, 31 +; GFX12DAGISEL-NEXT: s_ashr_i32 s5, s4, 31 ; GFX12DAGISEL-NEXT: s_wait_kmcnt 0x0 -; GFX12DAGISEL-NEXT: s_mul_hi_u32 s4, s0, s2 -; GFX12DAGISEL-NEXT: s_mul_i32 s5, s1, s2 -; GFX12DAGISEL-NEXT: s_mul_i32 s3, s0, s3 -; GFX12DAGISEL-NEXT: s_add_co_u32 s4, s4, s5 -; GFX12DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX12DAGISEL-NEXT: s_add_co_u32 s3, s4, s3 +; GFX12DAGISEL-NEXT: s_mul_hi_u32 s6, s2, s4 +; GFX12DAGISEL-NEXT: s_mul_i32 s3, s3, s4 +; GFX12DAGISEL-NEXT: s_mul_i32 s5, s2, s5 +; GFX12DAGISEL-NEXT: s_add_co_u32 s3, s6, s3 +; GFX12DAGISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX12DAGISEL-NEXT: s_add_co_u32 s3, s3, s5 ; GFX12DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX12DAGISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3 ; GFX12DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX12DAGISEL-NEXT: s_endpgm entry: - %result = call i64 @llvm.amdgcn.wave.reduce.sub.i64(i64 poison, i32 1) + %result = call i64 @llvm.amdgcn.wave.reduce.sub.i64(i64 %in, i32 1) store i64 %result, ptr addrspace(1) %out ret void } @@ -2208,7 +1300,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX8DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s8, s[6:7] ; GFX8DAGISEL-NEXT: v_readlane_b32 s9, v2, s8 ; GFX8DAGISEL-NEXT: v_readlane_b32 s10, v3, s8 @@ -2216,7 +1308,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[6:7], s8 ; GFX8DAGISEL-NEXT: s_subb_u32 s5, s5, s10 ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX8DAGISEL-NEXT: ; %bb.2: ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -2229,7 +1321,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX8GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s8, s[6:7] ; GFX8GISEL-NEXT: v_readlane_b32 s9, v2, s8 ; GFX8GISEL-NEXT: v_readlane_b32 s10, v3, s8 @@ -2237,7 +1329,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8GISEL-NEXT: s_bitset0_b64 s[6:7], s8 ; GFX8GISEL-NEXT: s_subb_u32 s5, s5, s10 ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX8GISEL-NEXT: ; %bb.2: ; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -2250,7 +1342,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX9DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s8, s[6:7] ; GFX9DAGISEL-NEXT: v_readlane_b32 s9, v2, s8 ; GFX9DAGISEL-NEXT: v_readlane_b32 s10, v3, s8 @@ -2258,7 +1350,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[6:7], s8 ; GFX9DAGISEL-NEXT: s_subb_u32 s5, s5, s10 ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX9DAGISEL-NEXT: ; %bb.2: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -2271,7 +1363,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX9GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s8, s[6:7] ; GFX9GISEL-NEXT: v_readlane_b32 s9, v2, s8 ; GFX9GISEL-NEXT: v_readlane_b32 s10, v3, s8 @@ -2279,7 +1371,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9GISEL-NEXT: s_bitset0_b64 s[6:7], s8 ; GFX9GISEL-NEXT: s_subb_u32 s5, s5, s10 ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX9GISEL-NEXT: ; %bb.2: ; GFX9GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -2292,7 +1384,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX1064DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s8, s[6:7] ; GFX1064DAGISEL-NEXT: v_readlane_b32 s9, v2, s8 ; GFX1064DAGISEL-NEXT: v_readlane_b32 s10, v3, s8 @@ -2300,7 +1392,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064DAGISEL-NEXT: s_sub_u32 s4, s4, s9 ; GFX1064DAGISEL-NEXT: s_subb_u32 s5, s5, s10 ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1064DAGISEL-NEXT: ; %bb.2: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -2312,7 +1404,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX1064GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s8, s[6:7] ; GFX1064GISEL-NEXT: v_readlane_b32 s9, v2, s8 ; GFX1064GISEL-NEXT: v_readlane_b32 s10, v3, s8 @@ -2320,7 +1412,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064GISEL-NEXT: s_sub_u32 s4, s4, s9 ; GFX1064GISEL-NEXT: s_subb_u32 s5, s5, s10 ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1064GISEL-NEXT: ; %bb.2: ; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -2332,7 +1424,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1032DAGISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo -; GFX1032DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s8, v2, s7 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s9, v3, s7 @@ -2340,7 +1432,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032DAGISEL-NEXT: s_sub_u32 s4, s4, s8 ; GFX1032DAGISEL-NEXT: s_subb_u32 s5, s5, s9 ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1032DAGISEL-NEXT: ; %bb.2: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -2352,7 +1444,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo -; GFX1032GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032GISEL-NEXT: v_readlane_b32 s8, v2, s7 ; GFX1032GISEL-NEXT: v_readlane_b32 s9, v3, s7 @@ -2360,7 +1452,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032GISEL-NEXT: s_sub_u32 s4, s4, s8 ; GFX1032GISEL-NEXT: s_subb_u32 s5, s5, s9 ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1032GISEL-NEXT: ; %bb.2: ; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -2372,7 +1464,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s4, s[2:3] ; GFX1164DAGISEL-NEXT: v_readlane_b32 s5, v2, s4 @@ -2381,7 +1473,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164DAGISEL-NEXT: s_sub_u32 s0, s0, s5 ; GFX1164DAGISEL-NEXT: s_subb_u32 s1, s1, s6 ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1164DAGISEL-NEXT: ; %bb.2: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, s0 @@ -2393,7 +1485,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s4, s[2:3] ; GFX1164GISEL-NEXT: v_readlane_b32 s5, v2, s4 @@ -2402,7 +1494,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164GISEL-NEXT: s_sub_u32 s0, s0, s5 ; GFX1164GISEL-NEXT: s_subb_u32 s1, s1, s6 ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1164GISEL-NEXT: ; %bb.2: ; GFX1164GISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, s0 @@ -2414,7 +1506,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1132DAGISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132DAGISEL-NEXT: v_readlane_b32 s4, v2, s3 @@ -2423,7 +1515,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132DAGISEL-NEXT: s_sub_u32 s0, s0, s4 ; GFX1132DAGISEL-NEXT: s_subb_u32 s1, s1, s5 ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1132DAGISEL-NEXT: ; %bb.2: ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX1132DAGISEL-NEXT: global_store_b64 v[0:1], v[2:3], off @@ -2434,7 +1526,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132GISEL-NEXT: v_readlane_b32 s4, v2, s3 @@ -2443,7 +1535,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-NEXT: s_sub_u32 s0, s0, s4 ; GFX1132GISEL-NEXT: s_subb_u32 s1, s1, s5 ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1132GISEL-NEXT: ; %bb.2: ; GFX1132GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX1132GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off @@ -2458,7 +1550,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX12DAGISEL-NEXT: s_wait_kmcnt 0x0 ; GFX12DAGISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX12DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX12DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX12DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX12DAGISEL-NEXT: s_wait_alu 0xfffe ; GFX12DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX12DAGISEL-NEXT: s_wait_alu 0xfffe @@ -2468,7 +1560,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX12DAGISEL-NEXT: s_wait_alu 0xfffe ; GFX12DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 ; GFX12DAGISEL-NEXT: s_sub_nc_u64 s[0:1], s[0:1], s[4:5] -; GFX12DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX12DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX12DAGISEL-NEXT: ; %bb.2: ; GFX12DAGISEL-NEXT: s_wait_alu 0xfffe ; GFX12DAGISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 @@ -2489,7 +1581,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8DAGISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX8DAGISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX8DAGISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX8DAGISEL-NEXT: ; %bb.1: ; %else ; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s6, s[6:7] @@ -2502,13 +1594,13 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8DAGISEL-NEXT: s_mul_i32 s3, s3, s7 ; GFX8DAGISEL-NEXT: s_add_u32 s2, s2, s3 ; GFX8DAGISEL-NEXT: s_add_u32 s7, s2, s10 -; GFX8DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX8DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9] ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s7 ; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[2:3] -; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX8DAGISEL-NEXT: ; %bb.3: ; %if ; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s6, s[6:7] @@ -2522,7 +1614,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8DAGISEL-NEXT: s_add_u32 s7, s4, s8 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s7 -; GFX8DAGISEL-NEXT: .LBB9_4: ; %endif +; GFX8DAGISEL-NEXT: .LBB5_4: ; %endif ; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 @@ -2536,7 +1628,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX8GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX8GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX8GISEL-NEXT: ; %bb.1: ; %else ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s6, s[6:7] @@ -2549,10 +1641,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8GISEL-NEXT: s_mul_i32 s3, s3, s7 ; GFX8GISEL-NEXT: s_add_u32 s2, s2, s3 ; GFX8GISEL-NEXT: s_add_u32 s7, s2, s10 -; GFX8GISEL-NEXT: .LBB9_2: ; %Flow +; GFX8GISEL-NEXT: .LBB5_2: ; %Flow ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX8GISEL-NEXT: ; %bb.3: ; %if ; GFX8GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec @@ -2566,7 +1658,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8GISEL-NEXT: s_mul_i32 s5, s5, s7 ; GFX8GISEL-NEXT: s_add_u32 s4, s4, s5 ; GFX8GISEL-NEXT: s_add_u32 s7, s4, s8 -; GFX8GISEL-NEXT: .LBB9_4: ; %endif +; GFX8GISEL-NEXT: .LBB5_4: ; %endif ; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 @@ -2583,7 +1675,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9DAGISEL-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GFX9DAGISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX9DAGISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX9DAGISEL-NEXT: ; %bb.1: ; %else ; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] @@ -2596,13 +1688,13 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9DAGISEL-NEXT: s_mul_i32 s3, s3, s5 ; GFX9DAGISEL-NEXT: s_add_u32 s2, s2, s3 ; GFX9DAGISEL-NEXT: s_add_u32 s5, s2, s10 -; GFX9DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX9DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9] ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s5 ; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[2:3] -; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX9DAGISEL-NEXT: ; %bb.3: ; %if ; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] @@ -2616,7 +1708,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9DAGISEL-NEXT: s_add_u32 s5, s5, s8 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s5 -; GFX9DAGISEL-NEXT: .LBB9_4: ; %endif +; GFX9DAGISEL-NEXT: .LBB5_4: ; %endif ; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0 ; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] @@ -2629,7 +1721,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX9GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX9GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX9GISEL-NEXT: ; %bb.1: ; %else ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s6, s[6:7] @@ -2642,10 +1734,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9GISEL-NEXT: s_mul_i32 s3, s3, s7 ; GFX9GISEL-NEXT: s_add_u32 s2, s2, s3 ; GFX9GISEL-NEXT: s_add_u32 s7, s2, s10 -; GFX9GISEL-NEXT: .LBB9_2: ; %Flow +; GFX9GISEL-NEXT: .LBB5_2: ; %Flow ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX9GISEL-NEXT: ; %bb.3: ; %if ; GFX9GISEL-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x34 ; GFX9GISEL-NEXT: s_mov_b64 s[4:5], exec @@ -2659,7 +1751,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9GISEL-NEXT: s_mul_i32 s5, s8, s5 ; GFX9GISEL-NEXT: s_add_u32 s4, s7, s4 ; GFX9GISEL-NEXT: s_add_u32 s7, s4, s5 -; GFX9GISEL-NEXT: .LBB9_4: ; %endif +; GFX9GISEL-NEXT: .LBB5_4: ; %endif ; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -2676,7 +1768,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064DAGISEL-NEXT: ; implicit-def: $sgpr8_sgpr9 ; GFX1064DAGISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX1064DAGISEL-NEXT: s_xor_b64 s[4:5], exec, s[4:5] -; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1064DAGISEL-NEXT: ; %bb.1: ; %else ; GFX1064DAGISEL-NEXT: s_mov_b64 s[8:9], exec ; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s8, s[8:9] @@ -2689,7 +1781,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064DAGISEL-NEXT: s_add_u32 s3, s10, s3 ; GFX1064DAGISEL-NEXT: s_mul_i32 s8, s2, s8 ; GFX1064DAGISEL-NEXT: s_add_u32 s9, s3, s9 -; GFX1064DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX1064DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[4:5] ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s8 @@ -2721,7 +1813,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX1064GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1064GISEL-NEXT: ; %bb.1: ; %else ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s6, s[6:7] @@ -2734,10 +1826,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064GISEL-NEXT: s_add_u32 s3, s10, s3 ; GFX1064GISEL-NEXT: s_mul_i32 s6, s2, s6 ; GFX1064GISEL-NEXT: s_add_u32 s7, s3, s7 -; GFX1064GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1064GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1064GISEL-NEXT: ; %bb.3: ; %if ; GFX1064GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], exec @@ -2751,7 +1843,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064GISEL-NEXT: s_add_u32 s7, s8, s7 ; GFX1064GISEL-NEXT: s_mul_i32 s6, s6, s4 ; GFX1064GISEL-NEXT: s_add_u32 s7, s7, s5 -; GFX1064GISEL-NEXT: .LBB9_4: ; %endif +; GFX1064GISEL-NEXT: .LBB5_4: ; %endif ; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -2768,7 +1860,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032DAGISEL-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GFX1032DAGISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GFX1032DAGISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1032DAGISEL-NEXT: ; %bb.1: ; %else ; GFX1032DAGISEL-NEXT: s_mov_b32 s4, exec_lo ; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4 @@ -2781,7 +1873,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032DAGISEL-NEXT: s_add_u32 s3, s9, s3 ; GFX1032DAGISEL-NEXT: s_mul_i32 s4, s2, s4 ; GFX1032DAGISEL-NEXT: s_add_u32 s5, s3, s5 -; GFX1032DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX1032DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032DAGISEL-NEXT: s_or_saveexec_b32 s2, s8 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s4 @@ -2813,7 +1905,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX1032GISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GFX1032GISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1032GISEL-NEXT: ; %bb.1: ; %else ; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo ; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s6, s6 @@ -2826,10 +1918,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032GISEL-NEXT: s_add_u32 s3, s9, s3 ; GFX1032GISEL-NEXT: s_mul_i32 s6, s2, s6 ; GFX1032GISEL-NEXT: s_add_u32 s7, s3, s7 -; GFX1032GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1032GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s2, s8 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1032GISEL-NEXT: ; %bb.3: ; %if ; GFX1032GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo @@ -2843,7 +1935,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032GISEL-NEXT: s_add_u32 s5, s5, s7 ; GFX1032GISEL-NEXT: s_mul_i32 s6, s6, s3 ; GFX1032GISEL-NEXT: s_add_u32 s7, s5, s4 -; GFX1032GISEL-NEXT: .LBB9_4: ; %endif +; GFX1032GISEL-NEXT: .LBB5_4: ; %endif ; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -2862,7 +1954,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 ; GFX1164DAGISEL-NEXT: s_xor_b64 s[6:7], exec, s[6:7] -; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1164DAGISEL-NEXT: ; %bb.1: ; %else ; GFX1164DAGISEL-NEXT: s_mov_b64 s[8:9], exec ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) @@ -2877,7 +1969,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164DAGISEL-NEXT: s_add_u32 s3, s10, s3 ; GFX1164DAGISEL-NEXT: s_mul_i32 s8, s2, s8 ; GFX1164DAGISEL-NEXT: s_add_u32 s9, s3, s9 -; GFX1164DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX1164DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[6:7] ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s8 @@ -2913,7 +2005,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1164GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1164GISEL-NEXT: ; %bb.1: ; %else ; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) @@ -2928,10 +2020,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164GISEL-NEXT: s_add_u32 s3, s10, s3 ; GFX1164GISEL-NEXT: s_mul_i32 s6, s2, s6 ; GFX1164GISEL-NEXT: s_add_u32 s7, s3, s7 -; GFX1164GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1164GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[2:3], s[8:9] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1164GISEL-NEXT: ; %bb.3: ; %if ; GFX1164GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], exec @@ -2947,7 +2039,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164GISEL-NEXT: s_add_u32 s5, s8, s5 ; GFX1164GISEL-NEXT: s_mul_i32 s6, s4, s6 ; GFX1164GISEL-NEXT: s_add_u32 s7, s5, s7 -; GFX1164GISEL-NEXT: .LBB9_4: ; %endif +; GFX1164GISEL-NEXT: .LBB5_4: ; %endif ; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -2966,7 +2058,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 ; GFX1132DAGISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1132DAGISEL-NEXT: ; %bb.1: ; %else ; GFX1132DAGISEL-NEXT: s_mov_b32 s6, exec_lo ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) @@ -2981,7 +2073,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132DAGISEL-NEXT: s_add_u32 s3, s9, s3 ; GFX1132DAGISEL-NEXT: s_mul_i32 s6, s2, s6 ; GFX1132DAGISEL-NEXT: s_add_u32 s7, s3, s7 -; GFX1132DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX1132DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132DAGISEL-NEXT: s_or_saveexec_b32 s2, s8 ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 @@ -3016,7 +2108,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1132GISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1132GISEL-NEXT: ; %bb.1: ; %else ; GFX1132GISEL-NEXT: s_mov_b32 s6, exec_lo ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) @@ -3031,10 +2123,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132GISEL-NEXT: s_add_u32 s3, s9, s3 ; GFX1132GISEL-NEXT: s_mul_i32 s6, s2, s6 ; GFX1132GISEL-NEXT: s_add_u32 s7, s3, s7 -; GFX1132GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1132GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s2, s8 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1132GISEL-NEXT: ; %bb.3: ; %if ; GFX1132GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo @@ -3050,7 +2142,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132GISEL-NEXT: s_add_u32 s5, s7, s5 ; GFX1132GISEL-NEXT: s_mul_i32 s6, s4, s3 ; GFX1132GISEL-NEXT: s_add_u32 s7, s5, s8 -; GFX1132GISEL-NEXT: .LBB9_4: ; %endif +; GFX1132GISEL-NEXT: .LBB5_4: ; %endif ; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 ; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0 @@ -3068,7 +2160,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX12DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 ; GFX12DAGISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX12DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX12DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX12DAGISEL-NEXT: ; %bb.1: ; %else ; GFX12DAGISEL-NEXT: s_mov_b32 s6, exec_lo ; GFX12DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) @@ -3083,7 +2175,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX12DAGISEL-NEXT: s_add_co_u32 s3, s9, s3 ; GFX12DAGISEL-NEXT: s_mul_i32 s6, s2, s6 ; GFX12DAGISEL-NEXT: s_add_co_u32 s7, s3, s7 -; GFX12DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX12DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX12DAGISEL-NEXT: s_wait_kmcnt 0x0 ; GFX12DAGISEL-NEXT: s_or_saveexec_b32 s2, s8 ; GFX12DAGISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll index 1f848d577d2a4..54c8e2e248f57 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll @@ -125,153 +125,19 @@ entry: ret void } -define amdgpu_kernel void @const_value(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: const_value: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, 0x7b -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: const_value: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, 0x7b -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: const_value: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: const_value: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: const_value: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: const_value: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: const_value: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: const_value: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: const_value: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7b -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: const_value: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %result = call i32 @llvm.amdgcn.wave.reduce.umax.i32(i32 123, i32 1) - store i32 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) { -; GFX8DAGISEL-LABEL: poison_value: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: poison_value: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: poison_value: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: poison_value: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: poison_value: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: poison_value: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_endpgm -; -; GFX11DAGISEL-LABEL: poison_value: -; GFX11DAGISEL: ; %bb.0: ; %entry -; GFX11DAGISEL-NEXT: s_endpgm -; -; GFX11GISEL-LABEL: poison_value: -; GFX11GISEL: ; %bb.0: ; %entry -; GFX11GISEL-NEXT: s_endpgm -entry: - %result = call i32 @llvm.amdgcn.wave.reduce.umax.i32(i32 poison, i32 1) - store i32 %result, ptr addrspace(1) %out - ret void -} - define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) { ; GFX8DAGISEL-LABEL: divergent_value: ; GFX8DAGISEL: ; %bb.0: ; %entry ; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX8DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX8DAGISEL-NEXT: s_max_u32 s4, s4, s6 ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX8DAGISEL-NEXT: ; %bb.2: ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 @@ -285,13 +151,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) { ; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8GISEL-NEXT: s_mov_b32 s4, 0 -; GFX8GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX8GISEL-NEXT: s_max_u32 s4, s4, s6 ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX8GISEL-NEXT: ; %bb.2: ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 @@ -306,13 +172,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) { ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX9DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX9DAGISEL-NEXT: s_max_u32 s4, s4, s6 ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX9DAGISEL-NEXT: ; %bb.2: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -324,13 +190,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) { ; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX9GISEL-NEXT: s_max_u32 s4, s4, s6 ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX9GISEL-NEXT: ; %bb.2: ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -344,13 +210,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) { ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX1064DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1064DAGISEL-NEXT: s_max_u32 s4, s4, s6 ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1064DAGISEL-NEXT: ; %bb.2: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -362,13 +228,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) { ; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064GISEL-NEXT: s_mov_b32 s4, 0 -; GFX1064GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1064GISEL-NEXT: s_max_u32 s4, s4, s6 ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1064GISEL-NEXT: ; %bb.2: ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -382,13 +248,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) { ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1032DAGISEL-NEXT: s_mov_b32 s2, 0 -; GFX1032DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1032DAGISEL-NEXT: s_max_u32 s2, s2, s5 ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1032DAGISEL-NEXT: ; %bb.2: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -400,13 +266,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) { ; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1032GISEL-NEXT: s_mov_b32 s2, 0 -; GFX1032GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3 ; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1032GISEL-NEXT: s_max_u32 s2, s2, s5 ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1032GISEL-NEXT: ; %bb.2: ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -421,14 +287,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) { ; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX1164DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1164DAGISEL-NEXT: s_max_u32 s4, s4, s6 ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1164DAGISEL-NEXT: ; %bb.2: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -441,14 +307,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) { ; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164GISEL-NEXT: s_mov_b32 s4, 0 -; GFX1164GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1164GISEL-NEXT: s_max_u32 s4, s4, s6 ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1164GISEL-NEXT: ; %bb.2: ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -462,14 +328,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) { ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0 -; GFX1132DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1132DAGISEL-NEXT: s_max_u32 s2, s2, s5 ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1132DAGISEL-NEXT: ; %bb.2: ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -482,14 +348,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) { ; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1132GISEL-NEXT: s_mov_b32 s2, 0 -; GFX1132GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3 ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1132GISEL-NEXT: s_max_u32 s2, s2, s5 ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1132GISEL-NEXT: ; %bb.2: ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -517,20 +383,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX8DAGISEL-NEXT: ; %bb.3: ; %if ; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX8DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX8DAGISEL-NEXT: s_max_u32 s6, s6, s8 ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX8DAGISEL-NEXT: ; %bb.5: ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX8DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX8DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -545,26 +411,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX8GISEL-NEXT: ; implicit-def: $sgpr6 ; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX8GISEL-NEXT: ; %bb.1: ; %else ; GFX8GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX8GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b32 s6, s2 -; GFX8GISEL-NEXT: .LBB4_2: ; %Flow +; GFX8GISEL-NEXT: .LBB2_2: ; %Flow ; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX8GISEL-NEXT: ; %bb.3: ; %if ; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8GISEL-NEXT: s_mov_b32 s6, 0 -; GFX8GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX8GISEL-NEXT: s_max_u32 s6, s6, s8 ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX8GISEL-NEXT: .LBB4_5: ; %endif +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX8GISEL-NEXT: .LBB2_5: ; %endif ; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6 @@ -588,20 +454,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX9DAGISEL-NEXT: ; %bb.3: ; %if ; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX9DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX9DAGISEL-NEXT: s_max_u32 s6, s6, s8 ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX9DAGISEL-NEXT: ; %bb.5: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX9DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX9DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -615,26 +481,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX9GISEL-NEXT: ; implicit-def: $sgpr6 ; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX9GISEL-NEXT: ; %bb.1: ; %else ; GFX9GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX9GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b32 s6, s2 -; GFX9GISEL-NEXT: .LBB4_2: ; %Flow +; GFX9GISEL-NEXT: .LBB2_2: ; %Flow ; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX9GISEL-NEXT: ; %bb.3: ; %if ; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9GISEL-NEXT: s_mov_b32 s6, 0 -; GFX9GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX9GISEL-NEXT: s_max_u32 s6, s6, s8 ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX9GISEL-NEXT: .LBB4_5: ; %endif +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX9GISEL-NEXT: .LBB2_5: ; %endif ; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -657,20 +523,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX1064DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1064DAGISEL-NEXT: s_max_u32 s6, s6, s8 ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1064DAGISEL-NEXT: ; %bb.5: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX1064DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1064DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -684,26 +550,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6 ; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1064GISEL-NEXT: ; %bb.1: ; %else ; GFX1064GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b32 s6, s2 -; GFX1064GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1064GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1064GISEL-NEXT: ; %bb.3: ; %if ; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064GISEL-NEXT: s_mov_b32 s6, 0 -; GFX1064GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1064GISEL-NEXT: s_max_u32 s6, s6, s8 ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1064GISEL-NEXT: .LBB4_5: ; %endif +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1064GISEL-NEXT: .LBB2_5: ; %endif ; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -726,20 +592,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1032DAGISEL-NEXT: s_mov_b32 s1, 0 -; GFX1032DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1032DAGISEL-NEXT: s_max_u32 s1, s1, s6 ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1032DAGISEL-NEXT: ; %bb.5: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1032DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1032DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -753,26 +619,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0 ; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1032GISEL-NEXT: ; %bb.1: ; %else ; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c ; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b32 s0, s0 -; GFX1032GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1032GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1032GISEL-NEXT: ; %bb.3: ; %if ; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1032GISEL-NEXT: s_mov_b32 s0, 0 -; GFX1032GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2 ; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1032GISEL-NEXT: s_max_u32 s0, s0, s6 ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1032GISEL-NEXT: .LBB4_5: ; %endif +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1032GISEL-NEXT: .LBB2_5: ; %endif ; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0 @@ -797,21 +663,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX1164DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1164DAGISEL-NEXT: s_max_u32 s6, s6, s8 ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1164DAGISEL-NEXT: ; %bb.5: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX1164DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1164DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -827,27 +693,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1164GISEL-NEXT: ; %bb.1: ; %else ; GFX1164GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c ; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b32 s6, s2 -; GFX1164GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1164GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1164GISEL-NEXT: ; %bb.3: ; %if ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164GISEL-NEXT: s_mov_b32 s6, 0 -; GFX1164GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1164GISEL-NEXT: s_max_u32 s6, s6, s8 ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1164GISEL-NEXT: .LBB4_5: ; %endif +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1164GISEL-NEXT: .LBB2_5: ; %endif ; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -872,21 +738,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132DAGISEL-NEXT: s_mov_b32 s1, 0 -; GFX1132DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132DAGISEL-NEXT: s_max_u32 s1, s1, s6 ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1132DAGISEL-NEXT: ; %bb.5: ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1132DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1132DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -902,27 +768,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1132GISEL-NEXT: ; %bb.1: ; %else ; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c ; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b32 s0, s0 -; GFX1132GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1132GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1132GISEL-NEXT: ; %bb.3: ; %if ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132GISEL-NEXT: s_mov_b32 s0, 0 -; GFX1132GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132GISEL-NEXT: s_max_u32 s0, s0, s6 ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1132GISEL-NEXT: .LBB4_5: ; %endif +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1132GISEL-NEXT: .LBB2_5: ; %endif ; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0 @@ -1200,7 +1066,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX8DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1211,7 +1077,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX8DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB6_1 ; GFX8DAGISEL-NEXT: ; %bb.2: ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1224,7 +1090,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX8GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX8GISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX8GISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1235,7 +1101,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8GISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX8GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB6_1 ; GFX8GISEL-NEXT: ; %bb.2: ; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1248,7 +1114,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX9DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1259,7 +1125,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX9DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB6_1 ; GFX9DAGISEL-NEXT: ; %bb.2: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1272,7 +1138,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX9GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX9GISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1283,7 +1149,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9GISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX9GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB6_1 ; GFX9GISEL-NEXT: ; %bb.2: ; GFX9GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1296,7 +1162,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX1064DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1307,7 +1173,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX1064DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB6_1 ; GFX1064DAGISEL-NEXT: ; %bb.2: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1319,7 +1185,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX1064GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX1064GISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1330,7 +1196,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064GISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX1064GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB6_1 ; GFX1064GISEL-NEXT: ; %bb.2: ; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1342,7 +1208,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1032DAGISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo -; GFX1032DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1353,7 +1219,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032DAGISEL-NEXT: s_bitset0_b32 s6, s7 ; GFX1032DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB6_1 ; GFX1032DAGISEL-NEXT: ; %bb.2: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1365,7 +1231,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo -; GFX1032GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1376,7 +1242,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032GISEL-NEXT: s_bitset0_b32 s6, s7 ; GFX1032GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB6_1 ; GFX1032GISEL-NEXT: ; %bb.2: ; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1388,7 +1254,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s8, s[2:3] ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v4, s0 @@ -1400,7 +1266,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s8 ; GFX1164DAGISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB6_1 ; GFX1164DAGISEL-NEXT: ; %bb.2: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, s0 @@ -1412,7 +1278,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s8, s[2:3] ; GFX1164GISEL-NEXT: v_mov_b32_e32 v4, s0 @@ -1424,7 +1290,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s8 ; GFX1164GISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB6_1 ; GFX1164GISEL-NEXT: ; %bb.2: ; GFX1164GISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, s0 @@ -1436,7 +1302,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1132DAGISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 @@ -1447,7 +1313,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132DAGISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB6_1 ; GFX1132DAGISEL-NEXT: ; %bb.2: ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX1132DAGISEL-NEXT: global_store_b64 v[0:1], v[2:3], off @@ -1458,7 +1324,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 @@ -1469,7 +1335,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132GISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB6_1 ; GFX1132GISEL-NEXT: ; %bb.2: ; GFX1132GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX1132GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off @@ -1510,19 +1376,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX8GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX8GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB7_2 ; GFX8GISEL-NEXT: ; %bb.1: ; %else ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX8GISEL-NEXT: .LBB9_2: ; %Flow +; GFX8GISEL-NEXT: .LBB7_2: ; %Flow ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB7_4 ; GFX8GISEL-NEXT: ; %bb.3: ; %if ; GFX8GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX8GISEL-NEXT: .LBB9_4: ; %endif +; GFX8GISEL-NEXT: .LBB7_4: ; %endif ; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 @@ -1559,19 +1425,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX9GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX9GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB7_2 ; GFX9GISEL-NEXT: ; %bb.1: ; %else ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9GISEL-NEXT: .LBB9_2: ; %Flow +; GFX9GISEL-NEXT: .LBB7_2: ; %Flow ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB7_4 ; GFX9GISEL-NEXT: ; %bb.3: ; %if ; GFX9GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX9GISEL-NEXT: .LBB9_4: ; %endif +; GFX9GISEL-NEXT: .LBB7_4: ; %endif ; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1608,19 +1474,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX1064GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB7_2 ; GFX1064GISEL-NEXT: ; %bb.1: ; %else ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1064GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1064GISEL-NEXT: .LBB7_2: ; %Flow ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB7_4 ; GFX1064GISEL-NEXT: ; %bb.3: ; %if ; GFX1064GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX1064GISEL-NEXT: .LBB9_4: ; %endif +; GFX1064GISEL-NEXT: .LBB7_4: ; %endif ; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1657,19 +1523,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX1032GISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GFX1032GISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB7_2 ; GFX1032GISEL-NEXT: ; %bb.1: ; %else ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1032GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1032GISEL-NEXT: .LBB7_2: ; %Flow ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s2, s8 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB7_4 ; GFX1032GISEL-NEXT: ; %bb.3: ; %if ; GFX1032GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX1032GISEL-NEXT: .LBB9_4: ; %endif +; GFX1032GISEL-NEXT: .LBB7_4: ; %endif ; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1710,19 +1576,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1164GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB7_2 ; GFX1164GISEL-NEXT: ; %bb.1: ; %else ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1164GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1164GISEL-NEXT: .LBB7_2: ; %Flow ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[2:3], s[8:9] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB7_4 ; GFX1164GISEL-NEXT: ; %bb.3: ; %if ; GFX1164GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX1164GISEL-NEXT: .LBB9_4: ; %endif +; GFX1164GISEL-NEXT: .LBB7_4: ; %endif ; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1761,19 +1627,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1132GISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB7_2 ; GFX1132GISEL-NEXT: ; %bb.1: ; %else ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1132GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1132GISEL-NEXT: .LBB7_2: ; %Flow ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s2, s8 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB7_4 ; GFX1132GISEL-NEXT: ; %bb.3: ; %if ; GFX1132GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX1132GISEL-NEXT: .LBB9_4: ; %endif +; GFX1132GISEL-NEXT: .LBB7_4: ; %endif ; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 ; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll index c2cfb8828c30c..502ef84449751 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll @@ -125,153 +125,19 @@ entry: ret void } -define amdgpu_kernel void @const_value(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: const_value: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, 0x7b -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: const_value: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, 0x7b -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: const_value: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: const_value: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: const_value: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: const_value: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: const_value: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: const_value: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: const_value: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7b -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: const_value: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %result = call i32 @llvm.amdgcn.wave.reduce.umin.i32(i32 123, i32 1) - store i32 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @poison_value(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: poison_value: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: poison_value: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: poison_value: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: poison_value: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: poison_value: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: poison_value: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_endpgm -; -; GFX11DAGISEL-LABEL: poison_value: -; GFX11DAGISEL: ; %bb.0: ; %entry -; GFX11DAGISEL-NEXT: s_endpgm -; -; GFX11GISEL-LABEL: poison_value: -; GFX11GISEL: ; %bb.0: ; %entry -; GFX11GISEL-NEXT: s_endpgm -entry: - %result = call i32 @llvm.amdgcn.wave.reduce.umin.i32(i32 poison, i32 1) - store i32 %result, ptr addrspace(1) %out - ret void -} - define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX8DAGISEL-LABEL: divergent_value: ; GFX8DAGISEL: ; %bb.0: ; %entry ; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8DAGISEL-NEXT: s_mov_b32 s4, -1 -; GFX8DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX8DAGISEL-NEXT: s_min_u32 s4, s4, s6 ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX8DAGISEL-NEXT: ; %bb.2: ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 @@ -285,13 +151,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8GISEL-NEXT: s_mov_b32 s4, -1 -; GFX8GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX8GISEL-NEXT: s_min_u32 s4, s4, s6 ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX8GISEL-NEXT: ; %bb.2: ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 @@ -306,13 +172,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9DAGISEL-NEXT: s_mov_b32 s4, -1 -; GFX9DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX9DAGISEL-NEXT: s_min_u32 s4, s4, s6 ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX9DAGISEL-NEXT: ; %bb.2: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -324,13 +190,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9GISEL-NEXT: s_mov_b32 s4, -1 -; GFX9GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX9GISEL-NEXT: s_min_u32 s4, s4, s6 ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX9GISEL-NEXT: ; %bb.2: ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -344,13 +210,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064DAGISEL-NEXT: s_mov_b32 s4, -1 -; GFX1064DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1064DAGISEL-NEXT: s_min_u32 s4, s4, s6 ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1064DAGISEL-NEXT: ; %bb.2: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -362,13 +228,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064GISEL-NEXT: s_mov_b32 s4, -1 -; GFX1064GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] ; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1064GISEL-NEXT: s_min_u32 s4, s4, s6 ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1064GISEL-NEXT: ; %bb.2: ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -382,13 +248,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1032DAGISEL-NEXT: s_mov_b32 s2, -1 -; GFX1032DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1032DAGISEL-NEXT: s_min_u32 s2, s2, s5 ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1032DAGISEL-NEXT: ; %bb.2: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -400,13 +266,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1032GISEL-NEXT: s_mov_b32 s2, -1 -; GFX1032GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3 ; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1032GISEL-NEXT: s_min_u32 s2, s2, s5 ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1032GISEL-NEXT: ; %bb.2: ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -421,14 +287,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164DAGISEL-NEXT: s_mov_b32 s4, -1 -; GFX1164DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1164DAGISEL-NEXT: s_min_u32 s4, s4, s6 ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1164DAGISEL-NEXT: ; %bb.2: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -441,14 +307,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164GISEL-NEXT: s_mov_b32 s4, -1 -; GFX1164GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5 ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5 ; GFX1164GISEL-NEXT: s_min_u32 s4, s4, s6 ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1164GISEL-NEXT: ; %bb.2: ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 @@ -462,14 +328,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, -1 -; GFX1132DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1132DAGISEL-NEXT: s_min_u32 s2, s2, s5 ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1132DAGISEL-NEXT: ; %bb.2: ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -482,14 +348,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { ; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo ; GFX1132GISEL-NEXT: s_mov_b32 s2, -1 -; GFX1132GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3 ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4 ; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4 ; GFX1132GISEL-NEXT: s_min_u32 s2, s2, s5 ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB3_1 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB1_1 ; GFX1132GISEL-NEXT: ; %bb.2: ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -517,20 +383,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX8DAGISEL-NEXT: ; %bb.3: ; %if ; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8DAGISEL-NEXT: s_mov_b32 s6, -1 -; GFX8DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX8DAGISEL-NEXT: s_min_u32 s6, s6, s8 ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX8DAGISEL-NEXT: ; %bb.5: ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX8DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX8DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -545,26 +411,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX8GISEL-NEXT: ; implicit-def: $sgpr6 ; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX8GISEL-NEXT: ; %bb.1: ; %else ; GFX8GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX8GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b32 s6, s2 -; GFX8GISEL-NEXT: .LBB4_2: ; %Flow +; GFX8GISEL-NEXT: .LBB2_2: ; %Flow ; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX8GISEL-NEXT: ; %bb.3: ; %if ; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8GISEL-NEXT: s_mov_b32 s6, -1 -; GFX8GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX8GISEL-NEXT: s_min_u32 s6, s6, s8 ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX8GISEL-NEXT: .LBB4_5: ; %endif +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX8GISEL-NEXT: .LBB2_5: ; %endif ; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6 @@ -588,20 +454,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX9DAGISEL-NEXT: ; %bb.3: ; %if ; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9DAGISEL-NEXT: s_mov_b32 s6, -1 -; GFX9DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX9DAGISEL-NEXT: s_min_u32 s6, s6, s8 ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX9DAGISEL-NEXT: ; %bb.5: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX9DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX9DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -615,26 +481,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX9GISEL-NEXT: ; implicit-def: $sgpr6 ; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX9GISEL-NEXT: ; %bb.1: ; %else ; GFX9GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX9GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b32 s6, s2 -; GFX9GISEL-NEXT: .LBB4_2: ; %Flow +; GFX9GISEL-NEXT: .LBB2_2: ; %Flow ; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX9GISEL-NEXT: ; %bb.3: ; %if ; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9GISEL-NEXT: s_mov_b32 s6, -1 -; GFX9GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX9GISEL-NEXT: s_min_u32 s6, s6, s8 ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX9GISEL-NEXT: .LBB4_5: ; %endif +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX9GISEL-NEXT: .LBB2_5: ; %endif ; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -657,20 +523,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064DAGISEL-NEXT: s_mov_b32 s6, -1 -; GFX1064DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1064DAGISEL-NEXT: s_min_u32 s6, s6, s8 ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1064DAGISEL-NEXT: ; %bb.5: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX1064DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1064DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -684,26 +550,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6 ; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1064GISEL-NEXT: ; %bb.1: ; %else ; GFX1064GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b32 s6, s2 -; GFX1064GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1064GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1064GISEL-NEXT: ; %bb.3: ; %if ; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1064GISEL-NEXT: s_mov_b32 s6, -1 -; GFX1064GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] ; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1064GISEL-NEXT: s_min_u32 s6, s6, s8 ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1064GISEL-NEXT: .LBB4_5: ; %endif +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1064GISEL-NEXT: .LBB2_5: ; %endif ; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -726,20 +592,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1032DAGISEL-NEXT: s_mov_b32 s1, -1 -; GFX1032DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1032DAGISEL-NEXT: s_min_u32 s1, s1, s6 ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1032DAGISEL-NEXT: ; %bb.5: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1032DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1032DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -753,26 +619,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0 ; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo ; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1032GISEL-NEXT: ; %bb.1: ; %else ; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c ; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b32 s0, s0 -; GFX1032GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1032GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1032GISEL-NEXT: ; %bb.3: ; %if ; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1032GISEL-NEXT: s_mov_b32 s0, -1 -; GFX1032GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2 ; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1032GISEL-NEXT: s_min_u32 s0, s0, s6 ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1032GISEL-NEXT: .LBB4_5: ; %endif +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1032GISEL-NEXT: .LBB2_5: ; %endif ; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0 @@ -797,21 +663,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2 ; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164DAGISEL-NEXT: s_mov_b32 s6, -1 -; GFX1164DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1164DAGISEL-NEXT: s_min_u32 s6, s6, s8 ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1164DAGISEL-NEXT: ; %bb.5: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX1164DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1164DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -827,27 +693,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1164GISEL-NEXT: ; %bb.1: ; %else ; GFX1164GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c ; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b32 s6, s2 -; GFX1164GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1164GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1164GISEL-NEXT: ; %bb.3: ; %if ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164GISEL-NEXT: s_mov_b32 s6, -1 -; GFX1164GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7 ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7 ; GFX1164GISEL-NEXT: s_min_u32 s6, s6, s8 ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1164GISEL-NEXT: .LBB4_5: ; %endif +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1164GISEL-NEXT: .LBB2_5: ; %endif ; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -872,21 +738,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_6 +; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB2_6 ; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132DAGISEL-NEXT: s_mov_b32 s1, -1 -; GFX1132DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132DAGISEL-NEXT: s_min_u32 s1, s1, s6 ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 ; GFX1132DAGISEL-NEXT: ; %bb.5: ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1132DAGISEL-NEXT: .LBB4_6: ; %endif +; GFX1132DAGISEL-NEXT: .LBB2_6: ; %endif ; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0 @@ -902,27 +768,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_2 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_2 ; GFX1132GISEL-NEXT: ; %bb.1: ; %else ; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c ; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b32 s0, s0 -; GFX1132GISEL-NEXT: .LBB4_2: ; %Flow +; GFX1132GISEL-NEXT: .LBB2_2: ; %Flow ; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_5 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_5 ; GFX1132GISEL-NEXT: ; %bb.3: ; %if ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo ; GFX1132GISEL-NEXT: s_mov_b32 s0, -1 -; GFX1132GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3 ; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132GISEL-NEXT: s_min_u32 s0, s0, s6 ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1132GISEL-NEXT: .LBB4_5: ; %endif +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1132GISEL-NEXT: .LBB2_5: ; %endif ; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0 @@ -1054,153 +920,13 @@ entry: ret void } -define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: const_value_i64: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: const_value_i64: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: const_value_i64: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: const_value_i64: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: const_value_i64: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10DAGISEL-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: const_value_i64: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX10GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX10GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: const_value_i64: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b64 v1, v[0:1], s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: const_value_i64: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: const_value_i64: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, 0x7b -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b64 v1, v[0:1], s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: const_value_i64: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX1132GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %result = call i64 @llvm.amdgcn.wave.reduce.umin.i64(i64 123, i32 1) - store i64 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: poison_value_i64: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: poison_value_i64: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: poison_value_i64: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: poison_value_i64: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_endpgm -; -; GFX10DAGISEL-LABEL: poison_value_i64: -; GFX10DAGISEL: ; %bb.0: ; %entry -; GFX10DAGISEL-NEXT: s_endpgm -; -; GFX10GISEL-LABEL: poison_value_i64: -; GFX10GISEL: ; %bb.0: ; %entry -; GFX10GISEL-NEXT: s_endpgm -; -; GFX11DAGISEL-LABEL: poison_value_i64: -; GFX11DAGISEL: ; %bb.0: ; %entry -; GFX11DAGISEL-NEXT: s_endpgm -; -; GFX11GISEL-LABEL: poison_value_i64: -; GFX11GISEL: ; %bb.0: ; %entry -; GFX11GISEL-NEXT: s_endpgm -entry: - %result = call i64 @llvm.amdgcn.wave.reduce.umin.i64(i64 poison, i32 1) - store i64 %result, ptr addrspace(1) %out - ret void -} - define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-LABEL: divergent_value_i64: ; GFX8DAGISEL: ; %bb.0: ; %entry ; GFX8DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], -1 ; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX8DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1211,7 +937,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX8DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX8DAGISEL-NEXT: ; %bb.2: ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1224,7 +950,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b64 s[4:5], -1 ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX8GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX8GISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX8GISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1235,7 +961,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8GISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX8GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX8GISEL-NEXT: ; %bb.2: ; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1248,7 +974,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], -1 ; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX9DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1259,7 +985,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX9DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX9DAGISEL-NEXT: ; %bb.2: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1272,7 +998,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b64 s[4:5], -1 ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX9GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX9GISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1283,7 +1009,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9GISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX9GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX9GISEL-NEXT: ; %bb.2: ; GFX9GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1296,7 +1022,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], -1 ; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX1064DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1307,7 +1033,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX1064DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1064DAGISEL-NEXT: ; %bb.2: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1319,7 +1045,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], -1 ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX1064GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7] ; GFX1064GISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1330,7 +1056,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064GISEL-NEXT: s_bitset0_b64 s[6:7], s12 ; GFX1064GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1064GISEL-NEXT: ; %bb.2: ; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1342,7 +1068,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1032DAGISEL-NEXT: s_mov_b64 s[4:5], -1 ; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo -; GFX1032DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1353,7 +1079,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032DAGISEL-NEXT: s_bitset0_b32 s6, s7 ; GFX1032DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1032DAGISEL-NEXT: ; %bb.2: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1365,7 +1091,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b64 s[4:5], -1 ; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo -; GFX1032GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v4, s4 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1376,7 +1102,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032GISEL-NEXT: s_bitset0_b32 s6, s7 ; GFX1032GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1032GISEL-NEXT: ; %bb.2: ; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1388,7 +1114,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], -1 ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s8, s[2:3] ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v4, s0 @@ -1400,7 +1126,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s8 ; GFX1164DAGISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1164DAGISEL-NEXT: ; %bb.2: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, s0 @@ -1412,7 +1138,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], -1 ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s8, s[2:3] ; GFX1164GISEL-NEXT: v_mov_b32_e32 v4, s0 @@ -1424,7 +1150,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s8 ; GFX1164GISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1164GISEL-NEXT: ; %bb.2: ; GFX1164GISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, s0 @@ -1436,7 +1162,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1132DAGISEL-NEXT: s_mov_b64 s[0:1], -1 ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 @@ -1447,7 +1173,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132DAGISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1132DAGISEL-NEXT: ; %bb.2: ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX1132DAGISEL-NEXT: global_store_b64 v[0:1], v[2:3], off @@ -1458,7 +1184,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b64 s[0:1], -1 ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 @@ -1469,7 +1195,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132GISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1132GISEL-NEXT: ; %bb.2: ; GFX1132GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX1132GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off @@ -1510,19 +1236,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX8GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX8GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX8GISEL-NEXT: ; %bb.1: ; %else ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX8GISEL-NEXT: .LBB9_2: ; %Flow +; GFX8GISEL-NEXT: .LBB5_2: ; %Flow ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX8GISEL-NEXT: ; %bb.3: ; %if ; GFX8GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX8GISEL-NEXT: .LBB9_4: ; %endif +; GFX8GISEL-NEXT: .LBB5_4: ; %endif ; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 @@ -1559,19 +1285,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX9GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX9GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX9GISEL-NEXT: ; %bb.1: ; %else ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9GISEL-NEXT: .LBB9_2: ; %Flow +; GFX9GISEL-NEXT: .LBB5_2: ; %Flow ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX9GISEL-NEXT: ; %bb.3: ; %if ; GFX9GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX9GISEL-NEXT: .LBB9_4: ; %endif +; GFX9GISEL-NEXT: .LBB5_4: ; %endif ; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1608,19 +1334,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX1064GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1064GISEL-NEXT: ; %bb.1: ; %else ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1064GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1064GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1064GISEL-NEXT: ; %bb.3: ; %if ; GFX1064GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX1064GISEL-NEXT: .LBB9_4: ; %endif +; GFX1064GISEL-NEXT: .LBB5_4: ; %endif ; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1657,19 +1383,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX1032GISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GFX1032GISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1032GISEL-NEXT: ; %bb.1: ; %else ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1032GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1032GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s2, s8 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1032GISEL-NEXT: ; %bb.3: ; %if ; GFX1032GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[6:7] -; GFX1032GISEL-NEXT: .LBB9_4: ; %endif +; GFX1032GISEL-NEXT: .LBB5_4: ; %endif ; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1710,19 +1436,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1164GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1164GISEL-NEXT: ; %bb.1: ; %else ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1164GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1164GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[2:3], s[8:9] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1164GISEL-NEXT: ; %bb.3: ; %if ; GFX1164GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX1164GISEL-NEXT: .LBB9_4: ; %endif +; GFX1164GISEL-NEXT: .LBB5_4: ; %endif ; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -1761,19 +1487,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1132GISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1132GISEL-NEXT: ; %bb.1: ; %else ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1132GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1132GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s2, s8 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1132GISEL-NEXT: ; %bb.3: ; %if ; GFX1132GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX1132GISEL-NEXT: .LBB9_4: ; %endif +; GFX1132GISEL-NEXT: .LBB5_4: ; %endif ; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 ; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0 @@ -1797,3 +1523,6 @@ endif: store i64 %combine, ptr addrspace(1) %out ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX11DAGISEL: {{.*}} +; GFX11GISEL: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll index be432c00de728..d5f1750c268ab 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll @@ -200,1494 +200,773 @@ entry: ret void } -define amdgpu_kernel void @const_value(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: const_value: +define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { +; GFX8DAGISEL-LABEL: divergent_value: ; GFX8DAGISEL: ; %bb.0: ; %entry ; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8DAGISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX8DAGISEL-NEXT: s_mulk_i32 s2, 0x7b +; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0 +; GFX8DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] +; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX8DAGISEL-NEXT: s_xor_b32 s4, s4, s6 +; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX8DAGISEL-NEXT: ; %bb.2: ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2 ; GFX8DAGISEL-NEXT: s_endpgm ; -; GFX8GISEL-LABEL: const_value: +; GFX8GISEL-LABEL: divergent_value: ; GFX8GISEL: ; %bb.0: ; %entry ; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8GISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX8GISEL-NEXT: s_mulk_i32 s2, 0x7b +; GFX8GISEL-NEXT: s_mov_b32 s4, 0 +; GFX8GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] +; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX8GISEL-NEXT: s_xor_b32 s4, s4, s6 +; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX8GISEL-NEXT: ; %bb.2: ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 ; GFX8GISEL-NEXT: s_endpgm ; -; GFX9DAGISEL-LABEL: const_value: +; GFX9DAGISEL-LABEL: divergent_value: ; GFX9DAGISEL: ; %bb.0: ; %entry ; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9DAGISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX9DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0 +; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] +; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX9DAGISEL-NEXT: s_xor_b32 s4, s4, s6 +; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX9DAGISEL-NEXT: ; %bb.2: +; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9DAGISEL-NEXT: s_endpgm ; -; GFX9GISEL-LABEL: const_value: +; GFX9GISEL-LABEL: divergent_value: ; GFX9GISEL: ; %bb.0: ; %entry ; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9GISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX9GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX9GISEL-NEXT: s_mov_b32 s4, 0 +; GFX9GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] +; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX9GISEL-NEXT: s_xor_b32 s4, s4, s6 +; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX9GISEL-NEXT: ; %bb.2: +; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9GISEL-NEXT: s_endpgm ; -; GFX1064DAGISEL-LABEL: const_value: +; GFX1064DAGISEL-LABEL: divergent_value: ; GFX1064DAGISEL: ; %bb.0: ; %entry ; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064DAGISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1064DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0 +; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] +; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX1064DAGISEL-NEXT: s_xor_b32 s4, s4, s6 +; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1064DAGISEL-NEXT: ; %bb.2: +; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] +; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX1064DAGISEL-NEXT: s_endpgm ; -; GFX1064GISEL-LABEL: const_value: +; GFX1064GISEL-LABEL: divergent_value: ; GFX1064GISEL: ; %bb.0: ; %entry ; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1064GISEL-NEXT: s_mov_b32 s4, 0 +; GFX1064GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] +; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX1064GISEL-NEXT: s_xor_b32 s4, s4, s6 +; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1064GISEL-NEXT: ; %bb.2: +; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064GISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1064GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX1064GISEL-NEXT: s_endpgm ; -; GFX1032DAGISEL-LABEL: const_value: +; GFX1032DAGISEL-LABEL: divergent_value: ; GFX1032DAGISEL: ; %bb.0: ; %entry ; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032DAGISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1032DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo +; GFX1032DAGISEL-NEXT: s_mov_b32 s2, 0 +; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3 +; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 +; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4 +; GFX1032DAGISEL-NEXT: s_xor_b32 s2, s2, s5 +; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1032DAGISEL-NEXT: ; %bb.2: +; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] +; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX1032DAGISEL-NEXT: s_endpgm ; -; GFX1032GISEL-LABEL: const_value: +; GFX1032GISEL-LABEL: divergent_value: ; GFX1032GISEL: ; %bb.0: ; %entry ; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032GISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1032GISEL-NEXT: s_mulk_i32 s2, 0x7b +; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo +; GFX1032GISEL-NEXT: s_mov_b32 s2, 0 +; GFX1032GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3 +; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4 +; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4 +; GFX1032GISEL-NEXT: s_xor_b32 s2, s2, s5 +; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1032GISEL-NEXT: ; %bb.2: ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX1032GISEL-NEXT: s_endpgm ; -; GFX1164DAGISEL-LABEL: const_value: +; GFX1164DAGISEL-LABEL: divergent_value: ; GFX1164DAGISEL: ; %bb.0: ; %entry ; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1164DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX1164DAGISEL-NEXT: s_mov_b32 s4, 0 +; GFX1164DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] +; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX1164DAGISEL-NEXT: s_xor_b32 s4, s4, s6 +; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1164DAGISEL-NEXT: ; %bb.2: +; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX1164DAGISEL-NEXT: s_endpgm ; -; GFX1164GISEL-LABEL: const_value: +; GFX1164GISEL-LABEL: divergent_value: ; GFX1164GISEL: ; %bb.0: ; %entry ; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1164GISEL-NEXT: s_mov_b32 s4, 0 +; GFX1164GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] +; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5 +; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5 +; GFX1164GISEL-NEXT: s_xor_b32 s4, s4, s6 +; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1164GISEL-NEXT: ; %bb.2: +; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1164GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX1164GISEL-NEXT: s_endpgm ; -; GFX1132DAGISEL-LABEL: const_value: +; GFX1132DAGISEL-LABEL: divergent_value: ; GFX1132DAGISEL: ; %bb.0: ; %entry ; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132DAGISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 +; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo +; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0 +; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3 +; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 +; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4 +; GFX1132DAGISEL-NEXT: s_xor_b32 s2, s2, s5 +; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1132DAGISEL-NEXT: ; %bb.2: +; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX1132DAGISEL-NEXT: s_endpgm ; -; GFX1132GISEL-LABEL: const_value: +; GFX1132GISEL-LABEL: divergent_value: ; GFX1132GISEL: ; %bb.0: ; %entry ; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1132GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo +; GFX1132GISEL-NEXT: s_mov_b32 s2, 0 +; GFX1132GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3 +; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4 +; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4 +; GFX1132GISEL-NEXT: s_xor_b32 s2, s2, s5 +; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB1_1 +; GFX1132GISEL-NEXT: ; %bb.2: +; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX1132GISEL-NEXT: s_endpgm entry: - %result = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 123, i32 1) + %id.x = call i32 @llvm.amdgcn.workitem.id.x() + %result = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 %id.x, i32 1) store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) { -; GFX8DAGISEL-LABEL: poison_value: +define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { +; GFX8DAGISEL-LABEL: divergent_cfg: ; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 +; GFX8DAGISEL-NEXT: ; implicit-def: $sgpr2 +; GFX8DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX8DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX8DAGISEL-NEXT: ; %bb.1: ; %else +; GFX8DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c ; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] ; GFX8DAGISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX8DAGISEL-NEXT: ; implicit-def: $vgpr0 ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2 -; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2 +; GFX8DAGISEL-NEXT: s_mul_i32 s2, s6, s2 +; GFX8DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX8DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX8DAGISEL-NEXT: ; %bb.3: ; %if +; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX8DAGISEL-NEXT: s_mov_b32 s6, 0 +; GFX8DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] +; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX8DAGISEL-NEXT: s_xor_b32 s6, s6, s8 +; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX8DAGISEL-NEXT: ; %bb.5: +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6 +; GFX8DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX8DAGISEL-NEXT: flat_store_dword v[2:3], v1 ; GFX8DAGISEL-NEXT: s_endpgm ; -; GFX8GISEL-LABEL: poison_value: +; GFX8GISEL-LABEL: divergent_cfg: ; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 +; GFX8GISEL-NEXT: ; implicit-def: $sgpr6 +; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX8GISEL-NEXT: ; %bb.1: ; %else +; GFX8GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c ; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] ; GFX8GISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX8GISEL-NEXT: ; implicit-def: $vgpr0 +; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX8GISEL-NEXT: s_mul_i32 s6, s6, s2 +; GFX8GISEL-NEXT: .LBB2_2: ; %Flow +; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_5 +; GFX8GISEL-NEXT: ; %bb.3: ; %if +; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX8GISEL-NEXT: s_mov_b32 s6, 0 +; GFX8GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] +; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX8GISEL-NEXT: s_xor_b32 s6, s6, s8 +; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX8GISEL-NEXT: .LBB2_5: ; %endif +; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: s_mul_i32 s2, s0, s2 ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s2 ; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 ; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 ; GFX8GISEL-NEXT: s_endpgm ; -; GFX9DAGISEL-LABEL: poison_value: +; GFX9DAGISEL-LABEL: divergent_cfg: ; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 +; GFX9DAGISEL-NEXT: ; implicit-def: $sgpr2 +; GFX9DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX9DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX9DAGISEL-NEXT: ; %bb.1: ; %else +; GFX9DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c ; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] ; GFX9DAGISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX9DAGISEL-NEXT: ; implicit-def: $vgpr0 ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: s_mul_i32 s2, s0, s2 +; GFX9DAGISEL-NEXT: s_mul_i32 s2, s6, s2 +; GFX9DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX9DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX9DAGISEL-NEXT: ; %bb.3: ; %if +; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX9DAGISEL-NEXT: s_mov_b32 s6, 0 +; GFX9DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] +; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX9DAGISEL-NEXT: s_xor_b32 s6, s6, s8 +; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX9DAGISEL-NEXT: ; %bb.5: +; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6 +; GFX9DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] ; GFX9DAGISEL-NEXT: s_endpgm ; -; GFX9GISEL-LABEL: poison_value: +; GFX9GISEL-LABEL: divergent_cfg: ; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 +; GFX9GISEL-NEXT: ; implicit-def: $sgpr6 +; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX9GISEL-NEXT: ; %bb.1: ; %else +; GFX9GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c ; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] ; GFX9GISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX9GISEL-NEXT: ; implicit-def: $vgpr0 +; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9GISEL-NEXT: s_mul_i32 s6, s6, s2 +; GFX9GISEL-NEXT: .LBB2_2: ; %Flow +; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_5 +; GFX9GISEL-NEXT: ; %bb.3: ; %if +; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX9GISEL-NEXT: s_mov_b32 s6, 0 +; GFX9GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] +; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX9GISEL-NEXT: s_xor_b32 s6, s6, s8 +; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX9GISEL-NEXT: .LBB2_5: ; %endif +; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9GISEL-NEXT: s_endpgm ; -; GFX1064DAGISEL-LABEL: poison_value: +; GFX1064DAGISEL-LABEL: divergent_cfg: ; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1064DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 +; GFX1064DAGISEL-NEXT: ; implicit-def: $sgpr2 +; GFX1064DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX1064DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1064DAGISEL-NEXT: ; %bb.1: ; %else +; GFX1064DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c ; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX1064DAGISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] ; GFX1064DAGISEL-NEXT: s_and_b32 s2, s2, 1 ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s0, s2 +; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s6, s2 +; GFX1064DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX1064DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if +; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1064DAGISEL-NEXT: s_mov_b32 s6, 0 +; GFX1064DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] +; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX1064DAGISEL-NEXT: s_xor_b32 s6, s6, s8 +; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1064DAGISEL-NEXT: ; %bb.5: +; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6 +; GFX1064DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] ; GFX1064DAGISEL-NEXT: s_endpgm ; -; GFX1064GISEL-LABEL: poison_value: +; GFX1064GISEL-LABEL: divergent_cfg: ; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1064GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 +; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6 +; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc +; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1064GISEL-NEXT: ; %bb.1: ; %else +; GFX1064GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c ; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] ; GFX1064GISEL-NEXT: s_and_b32 s2, s2, 1 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX1064GISEL-NEXT: s_mul_i32 s6, s6, s2 +; GFX1064GISEL-NEXT: .LBB2_2: ; %Flow +; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_5 +; GFX1064GISEL-NEXT: ; %bb.3: ; %if +; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1064GISEL-NEXT: s_mov_b32 s6, 0 +; GFX1064GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] +; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX1064GISEL-NEXT: s_xor_b32 s6, s6, s8 +; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1064GISEL-NEXT: .LBB2_5: ; %endif +; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 +; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX1064GISEL-NEXT: s_endpgm ; -; GFX1032DAGISEL-LABEL: poison_value: +; GFX1032DAGISEL-LABEL: divergent_cfg: ; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1032DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc_lo, 15, v0 +; GFX1032DAGISEL-NEXT: ; implicit-def: $sgpr1 +; GFX1032DAGISEL-NEXT: s_and_saveexec_b32 s0, vcc_lo +; GFX1032DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0 +; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1032DAGISEL-NEXT: ; %bb.1: ; %else +; GFX1032DAGISEL-NEXT: s_load_dword s1, s[4:5], 0x2c ; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX1032DAGISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 ; GFX1032DAGISEL-NEXT: s_and_b32 s2, s2, 1 ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX1032DAGISEL-NEXT: s_mul_i32 s1, s1, s2 +; GFX1032DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX1032DAGISEL-NEXT: s_or_saveexec_b32 s0, s0 +; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if +; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1032DAGISEL-NEXT: s_mov_b32 s1, 0 +; GFX1032DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2 +; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 +; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3 +; GFX1032DAGISEL-NEXT: s_xor_b32 s1, s1, s6 +; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1032DAGISEL-NEXT: ; %bb.5: +; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX1032DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] ; GFX1032DAGISEL-NEXT: s_endpgm ; -; GFX1032GISEL-LABEL: poison_value: +; GFX1032GISEL-LABEL: divergent_cfg: ; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX1032GISEL-NEXT: v_cmp_le_u32_e32 vcc_lo, 16, v0 +; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0 +; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo +; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1032GISEL-NEXT: ; %bb.1: ; %else +; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c ; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 ; GFX1032GISEL-NEXT: s_and_b32 s2, s2, 1 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[0:1] +; GFX1032GISEL-NEXT: s_mul_i32 s0, s0, s2 +; GFX1032GISEL-NEXT: .LBB2_2: ; %Flow +; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_5 +; GFX1032GISEL-NEXT: ; %bb.3: ; %if +; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1032GISEL-NEXT: s_mov_b32 s0, 0 +; GFX1032GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2 +; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3 +; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3 +; GFX1032GISEL-NEXT: s_xor_b32 s0, s0, s6 +; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1032GISEL-NEXT: .LBB2_5: ; %endif +; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 +; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[2:3] ; GFX1032GISEL-NEXT: s_endpgm ; -; GFX1164DAGISEL-LABEL: poison_value: +; GFX1164DAGISEL-LABEL: divergent_cfg: ; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], exec +; GFX1164DAGISEL-NEXT: ; implicit-def: $sgpr2 +; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1164DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 +; GFX1164DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1164DAGISEL-NEXT: ; %bb.1: ; %else +; GFX1164DAGISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX1164DAGISEL-NEXT: ; implicit-def: $vgpr0 +; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: s_and_b32 s2, s2, 1 ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s0, s2 +; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s6, s2 +; GFX1164DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX1164DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: poison_value: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: poison_value: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132DAGISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: poison_value: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %result = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 poison, i32 1) - store i32 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: divergent_value: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX8DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] -; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX8DAGISEL-NEXT: s_xor_b32 s4, s4, s6 -; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX8DAGISEL-NEXT: ; %bb.2: -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: divergent_value: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_mov_b32 s4, 0 -; GFX8GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] -; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX8GISEL-NEXT: s_xor_b32 s4, s4, s6 -; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX8GISEL-NEXT: ; %bb.2: -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: divergent_value: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX9DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] -; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX9DAGISEL-NEXT: s_xor_b32 s4, s4, s6 -; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX9DAGISEL-NEXT: ; %bb.2: -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: divergent_value: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_mov_b32 s4, 0 -; GFX9GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] -; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX9GISEL-NEXT: s_xor_b32 s4, s4, s6 -; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX9GISEL-NEXT: ; %bb.2: -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX1064DAGISEL-LABEL: divergent_value: -; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX1064DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] -; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX1064DAGISEL-NEXT: s_xor_b32 s4, s4, s6 -; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1064DAGISEL-NEXT: ; %bb.2: -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1064DAGISEL-NEXT: s_endpgm -; -; GFX1064GISEL-LABEL: divergent_value: -; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064GISEL-NEXT: s_mov_b32 s4, 0 -; GFX1064GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] -; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX1064GISEL-NEXT: s_xor_b32 s4, s4, s6 -; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1064GISEL-NEXT: ; %bb.2: -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1064GISEL-NEXT: s_endpgm -; -; GFX1032DAGISEL-LABEL: divergent_value: -; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, 0 -; GFX1032DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3 -; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 -; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4 -; GFX1032DAGISEL-NEXT: s_xor_b32 s2, s2, s5 -; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1032DAGISEL-NEXT: ; %bb.2: -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1032DAGISEL-NEXT: s_endpgm -; -; GFX1032GISEL-LABEL: divergent_value: -; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo -; GFX1032GISEL-NEXT: s_mov_b32 s2, 0 -; GFX1032GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3 -; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4 -; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4 -; GFX1032GISEL-NEXT: s_xor_b32 s2, s2, s5 -; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1032GISEL-NEXT: ; %bb.2: -; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1032GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: divergent_value: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: s_mov_b32 s4, 0 -; GFX1164DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX1164DAGISEL-NEXT: s_xor_b32 s4, s4, s6 +; GFX1164DAGISEL-NEXT: s_mov_b32 s6, 0 +; GFX1164DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] +; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX1164DAGISEL-NEXT: s_xor_b32 s6, s6, s8 ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1164DAGISEL-NEXT: ; %bb.2: -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: divergent_value: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: s_mov_b32 s4, 0 -; GFX1164GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] -; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5 -; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5 -; GFX1164GISEL-NEXT: s_xor_b32 s4, s4, s6 -; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1164GISEL-NEXT: ; %bb.2: -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: divergent_value: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 -; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0 -; GFX1132DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 -; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4 -; GFX1132DAGISEL-NEXT: s_xor_b32 s2, s2, s5 -; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1132DAGISEL-NEXT: ; %bb.2: -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: divergent_value: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo -; GFX1132GISEL-NEXT: s_mov_b32 s2, 0 -; GFX1132GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3 -; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4 -; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4 -; GFX1132GISEL-NEXT: s_xor_b32 s2, s2, s5 -; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB3_1 -; GFX1132GISEL-NEXT: ; %bb.2: -; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %id.x = call i32 @llvm.amdgcn.workitem.id.x() - %result = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 %id.x, i32 1) - store i32 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { -; GFX8DAGISEL-LABEL: divergent_cfg: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 -; GFX8DAGISEL-NEXT: ; implicit-def: $sgpr2 -; GFX8DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX8DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX8DAGISEL-NEXT: ; %bb.1: ; %else -; GFX8DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8DAGISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX8DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: s_mul_i32 s2, s6, s2 -; GFX8DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX8DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX8DAGISEL-NEXT: ; %bb.3: ; %if -; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX8DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] -; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX8DAGISEL-NEXT: s_xor_b32 s6, s6, s8 -; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX8DAGISEL-NEXT: ; %bb.5: -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX8DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8DAGISEL-NEXT: flat_store_dword v[2:3], v1 -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: divergent_cfg: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 -; GFX8GISEL-NEXT: ; implicit-def: $sgpr6 -; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX8GISEL-NEXT: ; %bb.1: ; %else -; GFX8GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8GISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX8GISEL-NEXT: ; implicit-def: $vgpr0 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: s_mul_i32 s6, s6, s2 -; GFX8GISEL-NEXT: .LBB4_2: ; %Flow -; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_5 -; GFX8GISEL-NEXT: ; %bb.3: ; %if -; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_mov_b32 s6, 0 -; GFX8GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] -; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX8GISEL-NEXT: s_xor_b32 s6, s6, s8 -; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX8GISEL-NEXT: .LBB4_5: ; %endif -; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: divergent_cfg: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 -; GFX9DAGISEL-NEXT: ; implicit-def: $sgpr2 -; GFX9DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX9DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX9DAGISEL-NEXT: ; %bb.1: ; %else -; GFX9DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9DAGISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX9DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: s_mul_i32 s2, s6, s2 -; GFX9DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX9DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX9DAGISEL-NEXT: ; %bb.3: ; %if -; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX9DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] -; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX9DAGISEL-NEXT: s_xor_b32 s6, s6, s8 -; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX9DAGISEL-NEXT: ; %bb.5: -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX9DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: divergent_cfg: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 -; GFX9GISEL-NEXT: ; implicit-def: $sgpr6 -; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX9GISEL-NEXT: ; %bb.1: ; %else -; GFX9GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9GISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX9GISEL-NEXT: ; implicit-def: $vgpr0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: s_mul_i32 s6, s6, s2 -; GFX9GISEL-NEXT: .LBB4_2: ; %Flow -; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_5 -; GFX9GISEL-NEXT: ; %bb.3: ; %if -; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_mov_b32 s6, 0 -; GFX9GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] -; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX9GISEL-NEXT: s_xor_b32 s6, s6, s8 -; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX9GISEL-NEXT: .LBB4_5: ; %endif -; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX1064DAGISEL-LABEL: divergent_cfg: -; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 -; GFX1064DAGISEL-NEXT: ; implicit-def: $sgpr2 -; GFX1064DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX1064DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1064DAGISEL-NEXT: ; %bb.1: ; %else -; GFX1064DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064DAGISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s6, s2 -; GFX1064DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX1064DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if -; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX1064DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] -; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX1064DAGISEL-NEXT: s_xor_b32 s6, s6, s8 -; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1064DAGISEL-NEXT: ; %bb.5: -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX1064DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX1064DAGISEL-NEXT: s_endpgm -; -; GFX1064GISEL-LABEL: divergent_cfg: -; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 -; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6 -; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc -; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1064GISEL-NEXT: ; %bb.1: ; %else -; GFX1064GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064GISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: s_mul_i32 s6, s6, s2 -; GFX1064GISEL-NEXT: .LBB4_2: ; %Flow -; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_5 -; GFX1064GISEL-NEXT: ; %bb.3: ; %if -; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064GISEL-NEXT: s_mov_b32 s6, 0 -; GFX1064GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] -; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX1064GISEL-NEXT: s_xor_b32 s6, s6, s8 -; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1064GISEL-NEXT: .LBB4_5: ; %endif -; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1064GISEL-NEXT: s_endpgm -; -; GFX1032DAGISEL-LABEL: divergent_cfg: -; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc_lo, 15, v0 -; GFX1032DAGISEL-NEXT: ; implicit-def: $sgpr1 -; GFX1032DAGISEL-NEXT: s_and_saveexec_b32 s0, vcc_lo -; GFX1032DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0 -; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1032DAGISEL-NEXT: ; %bb.1: ; %else -; GFX1032DAGISEL-NEXT: s_load_dword s1, s[4:5], 0x2c -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032DAGISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: s_mul_i32 s1, s1, s2 -; GFX1032DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX1032DAGISEL-NEXT: s_or_saveexec_b32 s0, s0 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032DAGISEL-NEXT: s_mov_b32 s1, 0 -; GFX1032DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2 -; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 -; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3 -; GFX1032DAGISEL-NEXT: s_xor_b32 s1, s1, s6 -; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1032DAGISEL-NEXT: ; %bb.5: -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1032DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 -; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX1032DAGISEL-NEXT: s_endpgm -; -; GFX1032GISEL-LABEL: divergent_cfg: -; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: v_cmp_le_u32_e32 vcc_lo, 16, v0 -; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0 -; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo -; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1032GISEL-NEXT: ; %bb.1: ; %else -; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c -; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032GISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: s_mul_i32 s0, s0, s2 -; GFX1032GISEL-NEXT: .LBB4_2: ; %Flow -; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_5 -; GFX1032GISEL-NEXT: ; %bb.3: ; %if -; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032GISEL-NEXT: s_mov_b32 s0, 0 -; GFX1032GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2 -; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3 -; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3 -; GFX1032GISEL-NEXT: s_xor_b32 s0, s0, s6 -; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1032GISEL-NEXT: .LBB4_5: ; %endif -; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 -; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[2:3] -; GFX1032GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: divergent_cfg: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], exec -; GFX1164DAGISEL-NEXT: ; implicit-def: $sgpr2 -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1164DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 -; GFX1164DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1164DAGISEL-NEXT: ; %bb.1: ; %else -; GFX1164DAGISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c -; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164DAGISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s6, s2 -; GFX1164DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX1164DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] -; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if -; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: s_mov_b32 s6, 0 -; GFX1164DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX1164DAGISEL-NEXT: s_xor_b32 s6, s6, s8 -; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1164DAGISEL-NEXT: ; %bb.5: -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6 -; GFX1164DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: divergent_cfg: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], exec -; GFX1164GISEL-NEXT: ; implicit-def: $sgpr6 -; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 -; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1164GISEL-NEXT: ; %bb.1: ; %else -; GFX1164GISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164GISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: s_mul_i32 s6, s6, s2 -; GFX1164GISEL-NEXT: .LBB4_2: ; %Flow -; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_5 -; GFX1164GISEL-NEXT: ; %bb.3: ; %if -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: s_mov_b32 s6, 0 -; GFX1164GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7 -; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7 -; GFX1164GISEL-NEXT: s_xor_b32 s6, s6, s8 -; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1164GISEL-NEXT: .LBB4_5: ; %endif -; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: divergent_cfg: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1132DAGISEL-NEXT: s_mov_b32 s0, exec_lo -; GFX1132DAGISEL-NEXT: ; implicit-def: $sgpr1 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1132DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 -; GFX1132DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0 -; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1132DAGISEL-NEXT: ; %bb.1: ; %else -; GFX1132DAGISEL-NEXT: s_load_b32 s1, s[4:5], 0x2c -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132DAGISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: s_mul_i32 s1, s1, s2 -; GFX1132DAGISEL-NEXT: .LBB4_2: ; %Flow -; GFX1132DAGISEL-NEXT: s_or_saveexec_b32 s0, s0 -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 -; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_6 -; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: s_mov_b32 s1, 0 -; GFX1132DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 -; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 -; GFX1132DAGISEL-NEXT: s_xor_b32 s1, s1, s6 -; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1132DAGISEL-NEXT: ; %bb.5: -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX1132DAGISEL-NEXT: .LBB4_6: ; %endif -; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: divergent_cfg: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1132GISEL-NEXT: s_mov_b32 s1, exec_lo -; GFX1132GISEL-NEXT: ; implicit-def: $sgpr0 -; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 -; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_2 -; GFX1132GISEL-NEXT: ; %bb.1: ; %else -; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132GISEL-NEXT: s_and_b32 s2, s2, 1 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: s_mul_i32 s0, s0, s2 -; GFX1132GISEL-NEXT: .LBB4_2: ; %Flow -; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_5 -; GFX1132GISEL-NEXT: ; %bb.3: ; %if -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: s_mov_b32 s0, 0 -; GFX1132GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 -; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3 -; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 -; GFX1132GISEL-NEXT: s_xor_b32 s0, s0, s6 -; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_4 -; GFX1132GISEL-NEXT: .LBB4_5: ; %endif -; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 -; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 -; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[2:3] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %tid = call i32 @llvm.amdgcn.workitem.id.x() - %d_cmp = icmp ult i32 %tid, 16 - br i1 %d_cmp, label %if, label %else - -if: - %reducedValTid = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 %tid, i32 1) - br label %endif - -else: - %reducedValIn = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 %in, i32 1) - br label %endif - -endif: - %combine = phi i32 [%reducedValTid, %if], [%reducedValIn, %else] - store i32 %combine, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @uniform_value_i64(ptr addrspace(1) %out, i64 %in) { -; GFX8DAGISEL-LABEL: uniform_value_i64: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX8DAGISEL-NEXT: s_and_b32 s4, s4, 1 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: s_mul_i32 s1, s3, s4 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: s_mul_i32 s0, s2, s4 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: uniform_value_i64: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX8GISEL-NEXT: s_and_b32 s4, s4, 1 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX8GISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: uniform_value_i64: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX9DAGISEL-NEXT: s_and_b32 s4, s4, 1 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX9DAGISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: uniform_value_i64: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX9GISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX9GISEL-NEXT: s_and_b32 s4, s4, 1 -; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX9GISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX1064DAGISEL-LABEL: uniform_value_i64: -; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX1064DAGISEL-NEXT: s_and_b32 s4, s4, 1 -; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1064DAGISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1064DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1064DAGISEL-NEXT: s_endpgm -; -; GFX1064GISEL-LABEL: uniform_value_i64: -; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX1064GISEL-NEXT: s_and_b32 s4, s4, 1 -; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1064GISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1064GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1064GISEL-NEXT: s_endpgm -; -; GFX1032DAGISEL-LABEL: uniform_value_i64: -; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: s_mov_b32 s4, exec_lo -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4 -; GFX1032DAGISEL-NEXT: s_and_b32 s4, s4, 1 -; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1032DAGISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1032DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1032DAGISEL-NEXT: s_endpgm -; -; GFX1032GISEL-LABEL: uniform_value_i64: -; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX1032GISEL-NEXT: s_mov_b32 s4, exec_lo -; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s4, s4 -; GFX1032GISEL-NEXT: s_and_b32 s4, s4, 1 -; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1032GISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1032GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1032GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: uniform_value_i64: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: s_and_b32 s4, s4, 1 -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1164DAGISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm -; -; GFX1164GISEL-LABEL: uniform_value_i64: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX1164GISEL-NEXT: s_mov_b64 s[4:5], exec -; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: s_and_b32 s4, s4, 1 -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1164GISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1164GISEL-NEXT: s_endpgm -; -; GFX1132DAGISEL-LABEL: uniform_value_i64: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: s_mov_b32 s4, exec_lo -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4 -; GFX1132DAGISEL-NEXT: s_and_b32 s4, s4, 1 -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1132DAGISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3 -; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm -; -; GFX1132GISEL-LABEL: uniform_value_i64: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX1132GISEL-NEXT: s_mov_b32 s4, exec_lo -; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s4, s4 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: s_and_b32 s4, s4, 1 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: s_mul_i32 s2, s2, s4 -; GFX1132GISEL-NEXT: s_mul_i32 s3, s3, s4 -; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] -; GFX1132GISEL-NEXT: s_endpgm -entry: - %result = call i64 @llvm.amdgcn.wave.reduce.xor.i64(i64 %in, i32 1) - store i64 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) { -; GFX8DAGISEL-LABEL: const_value_i64: -; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8DAGISEL-NEXT: s_and_b32 s3, s2, 1 -; GFX8DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX8DAGISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s3 -; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; GFX8DAGISEL-NEXT: s_endpgm -; -; GFX8GISEL-LABEL: const_value_i64: -; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8GISEL-NEXT: s_and_b32 s3, s2, 1 -; GFX8GISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX8GISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0 -; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] -; GFX8GISEL-NEXT: s_endpgm -; -; GFX9DAGISEL-LABEL: const_value_i64: -; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9DAGISEL-NEXT: s_and_b32 s3, s2, 1 -; GFX9DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX9DAGISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9DAGISEL-NEXT: s_endpgm -; -; GFX9GISEL-LABEL: const_value_i64: -; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9GISEL-NEXT: s_and_b32 s3, s2, 1 -; GFX9GISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX9GISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX9GISEL-NEXT: s_endpgm -; -; GFX1064DAGISEL-LABEL: const_value_i64: -; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064DAGISEL-NEXT: s_and_b32 s3, s2, 1 -; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX1064DAGISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1064DAGISEL-NEXT: s_endpgm -; -; GFX1064GISEL-LABEL: const_value_i64: -; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064GISEL-NEXT: s_and_b32 s3, s2, 1 -; GFX1064GISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX1064GISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1064GISEL-NEXT: s_endpgm -; -; GFX1032DAGISEL-LABEL: const_value_i64: -; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032DAGISEL-NEXT: s_and_b32 s3, s2, 1 -; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX1032DAGISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1032DAGISEL-NEXT: s_endpgm -; -; GFX1032GISEL-LABEL: const_value_i64: -; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032GISEL-NEXT: s_and_b32 s3, s2, 1 -; GFX1032GISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX1032GISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s3 -; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; GFX1032GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: const_value_i64: -; GFX1164DAGISEL: ; %bb.0: ; %entry +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1164DAGISEL-NEXT: ; %bb.5: +; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6 +; GFX1164DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: s_and_b32 s3, s2, 1 -; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX1164DAGISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s3 +; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX1164DAGISEL-NEXT: s_endpgm ; -; GFX1164GISEL-LABEL: const_value_i64: +; GFX1164GISEL-LABEL: divergent_cfg: ; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], exec +; GFX1164GISEL-NEXT: ; implicit-def: $sgpr6 +; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 +; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1164GISEL-NEXT: ; %bb.1: ; %else +; GFX1164GISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: s_and_b32 s3, s2, 1 -; GFX1164GISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX1164GISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s3 +; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX1164GISEL-NEXT: s_and_b32 s2, s2, 1 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1164GISEL-NEXT: s_mul_i32 s6, s6, s2 +; GFX1164GISEL-NEXT: .LBB2_2: ; %Flow +; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1] +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_5 +; GFX1164GISEL-NEXT: ; %bb.3: ; %if +; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1164GISEL-NEXT: s_mov_b32 s6, 0 +; GFX1164GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] +; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7 +; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7 +; GFX1164GISEL-NEXT: s_xor_b32 s6, s6, s8 +; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1164GISEL-NEXT: .LBB2_5: ; %endif +; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 +; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX1164GISEL-NEXT: s_endpgm ; -; GFX1132DAGISEL-LABEL: const_value_i64: +; GFX1132DAGISEL-LABEL: divergent_cfg: ; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1132DAGISEL-NEXT: s_mov_b32 s0, exec_lo +; GFX1132DAGISEL-NEXT: ; implicit-def: $sgpr1 +; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1132DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 +; GFX1132DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0 +; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1132DAGISEL-NEXT: ; %bb.1: ; %else +; GFX1132DAGISEL-NEXT: s_load_b32 s1, s[4:5], 0x2c ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1132DAGISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132DAGISEL-NEXT: s_and_b32 s3, s2, 1 +; GFX1132DAGISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX1132DAGISEL-NEXT: s_mul_i32 s1, s1, s2 +; GFX1132DAGISEL-NEXT: .LBB2_2: ; %Flow +; GFX1132DAGISEL-NEXT: s_or_saveexec_b32 s0, s0 +; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB2_6 +; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if +; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1132DAGISEL-NEXT: s_mov_b32 s1, 0 +; GFX1132DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX1132DAGISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3 +; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 +; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 +; GFX1132DAGISEL-NEXT: s_xor_b32 s1, s1, s6 +; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1132DAGISEL-NEXT: ; %bb.5: +; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX1132DAGISEL-NEXT: .LBB2_6: ; %endif +; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0 ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX1132DAGISEL-NEXT: s_endpgm ; -; GFX1132GISEL-LABEL: const_value_i64: +; GFX1132GISEL-LABEL: divergent_cfg: ; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1132GISEL-NEXT: s_mov_b32 s1, exec_lo +; GFX1132GISEL-NEXT: ; implicit-def: $sgpr0 +; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 +; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_2 +; GFX1132GISEL-NEXT: ; %bb.1: ; %else +; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 +; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: s_and_b32 s3, s2, 1 -; GFX1132GISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX1132GISEL-NEXT: s_mul_i32 s3, s3, 0 +; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 +; GFX1132GISEL-NEXT: s_and_b32 s2, s2, 1 +; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX1132GISEL-NEXT: s_mul_i32 s0, s0, s2 +; GFX1132GISEL-NEXT: .LBB2_2: ; %Flow +; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_5 +; GFX1132GISEL-NEXT: ; %bb.3: ; %if +; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1132GISEL-NEXT: s_mov_b32 s0, 0 +; GFX1132GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 +; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3 +; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 +; GFX1132GISEL-NEXT: s_xor_b32 s0, s0, s6 +; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB2_4 +; GFX1132GISEL-NEXT: .LBB2_5: ; %endif +; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 +; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[2:3] ; GFX1132GISEL-NEXT: s_endpgm entry: - %result = call i64 @llvm.amdgcn.wave.reduce.xor.i64(i64 123, i32 1) - store i64 %result, ptr addrspace(1) %out + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %d_cmp = icmp ult i32 %tid, 16 + br i1 %d_cmp, label %if, label %else + +if: + %reducedValTid = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 %tid, i32 1) + br label %endif + +else: + %reducedValIn = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 %in, i32 1) + br label %endif + +endif: + %combine = phi i32 [%reducedValTid, %if], [%reducedValIn, %else] + store i32 %combine, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) { -; GFX8DAGISEL-LABEL: poison_value_i64: +define amdgpu_kernel void @uniform_value_i64(ptr addrspace(1) %out, i64 %in) { +; GFX8DAGISEL-LABEL: uniform_value_i64: ; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8DAGISEL-NEXT: s_and_b32 s3, s2, 1 +; GFX8DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], exec +; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] +; GFX8DAGISEL-NEXT: s_and_b32 s4, s4, 1 ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX8DAGISEL-NEXT: s_mul_i32 s3, s1, s3 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s3 +; GFX8DAGISEL-NEXT: s_mul_i32 s1, s3, s4 +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX8DAGISEL-NEXT: s_mul_i32 s0, s2, s4 +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 ; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; GFX8DAGISEL-NEXT: s_endpgm ; -; GFX8GISEL-LABEL: poison_value_i64: +; GFX8GISEL-LABEL: uniform_value_i64: ; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8GISEL-NEXT: s_and_b32 s3, s2, 1 +; GFX8GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX8GISEL-NEXT: s_mov_b64 s[4:5], exec +; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] +; GFX8GISEL-NEXT: s_and_b32 s4, s4, 1 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX8GISEL-NEXT: s_mul_i32 s3, s1, s3 +; GFX8GISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX8GISEL-NEXT: s_mul_i32 s3, s3, s4 ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s3 @@ -1695,159 +974,159 @@ define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) { ; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GFX8GISEL-NEXT: s_endpgm ; -; GFX9DAGISEL-LABEL: poison_value_i64: +; GFX9DAGISEL-LABEL: uniform_value_i64: ; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9DAGISEL-NEXT: s_and_b32 s3, s2, 1 +; GFX9DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], exec +; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] +; GFX9DAGISEL-NEXT: s_and_b32 s4, s4, 1 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0 ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX9DAGISEL-NEXT: s_mul_i32 s3, s1, s3 +; GFX9DAGISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX9DAGISEL-NEXT: s_mul_i32 s3, s3, s4 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX9DAGISEL-NEXT: s_endpgm ; -; GFX9GISEL-LABEL: poison_value_i64: +; GFX9GISEL-LABEL: uniform_value_i64: ; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9GISEL-NEXT: s_and_b32 s3, s2, 1 +; GFX9GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9GISEL-NEXT: s_mov_b64 s[4:5], exec +; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] +; GFX9GISEL-NEXT: s_and_b32 s4, s4, 1 ; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX9GISEL-NEXT: s_mul_i32 s3, s1, s3 +; GFX9GISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX9GISEL-NEXT: s_mul_i32 s3, s3, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX9GISEL-NEXT: s_endpgm ; -; GFX1064DAGISEL-LABEL: poison_value_i64: +; GFX1064DAGISEL-LABEL: uniform_value_i64: ; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1064DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064DAGISEL-NEXT: s_and_b32 s3, s2, 1 +; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] +; GFX1064DAGISEL-NEXT: s_and_b32 s4, s4, 1 ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX1064DAGISEL-NEXT: s_mul_i32 s3, s1, s3 +; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1064DAGISEL-NEXT: s_mul_i32 s3, s3, s4 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1064DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX1064DAGISEL-NEXT: s_endpgm ; -; GFX1064GISEL-LABEL: poison_value_i64: +; GFX1064GISEL-LABEL: uniform_value_i64: ; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1064GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064GISEL-NEXT: s_and_b32 s3, s2, 1 +; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] +; GFX1064GISEL-NEXT: s_and_b32 s4, s4, 1 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX1064GISEL-NEXT: s_mul_i32 s3, s1, s3 +; GFX1064GISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1064GISEL-NEXT: s_mul_i32 s3, s3, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1064GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX1064GISEL-NEXT: s_endpgm ; -; GFX1032DAGISEL-LABEL: poison_value_i64: +; GFX1032DAGISEL-LABEL: uniform_value_i64: ; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1032DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX1032DAGISEL-NEXT: s_mov_b32 s4, exec_lo ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032DAGISEL-NEXT: s_and_b32 s3, s2, 1 +; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4 +; GFX1032DAGISEL-NEXT: s_and_b32 s4, s4, 1 ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX1032DAGISEL-NEXT: s_mul_i32 s3, s1, s3 +; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1032DAGISEL-NEXT: s_mul_i32 s3, s3, s4 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1032DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX1032DAGISEL-NEXT: s_endpgm ; -; GFX1032GISEL-LABEL: poison_value_i64: +; GFX1032GISEL-LABEL: uniform_value_i64: ; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1032GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX1032GISEL-NEXT: s_mov_b32 s4, exec_lo ; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032GISEL-NEXT: s_and_b32 s3, s2, 1 +; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s4, s4 +; GFX1032GISEL-NEXT: s_and_b32 s4, s4, 1 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX1032GISEL-NEXT: s_mul_i32 s3, s1, s3 +; GFX1032GISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1032GISEL-NEXT: s_mul_i32 s3, s3, s4 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1032GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX1032GISEL-NEXT: s_endpgm ; -; GFX1164DAGISEL-LABEL: poison_value_i64: +; GFX1164DAGISEL-LABEL: uniform_value_i64: ; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1164DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1164DAGISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: s_and_b32 s3, s2, 1 +; GFX1164DAGISEL-NEXT: s_and_b32 s4, s4, 1 ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX1164DAGISEL-NEXT: s_mul_i32 s3, s1, s3 +; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1164DAGISEL-NEXT: s_mul_i32 s3, s3, s4 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX1164DAGISEL-NEXT: s_endpgm ; -; GFX1164GISEL-LABEL: poison_value_i64: +; GFX1164GISEL-LABEL: uniform_value_i64: ; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1164GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1164GISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] +; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: s_and_b32 s3, s2, 1 +; GFX1164GISEL-NEXT: s_and_b32 s4, s4, 1 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX1164GISEL-NEXT: s_mul_i32 s3, s1, s3 +; GFX1164GISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1164GISEL-NEXT: s_mul_i32 s3, s3, s4 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX1164GISEL-NEXT: s_endpgm ; -; GFX1132DAGISEL-LABEL: poison_value_i64: +; GFX1132DAGISEL-LABEL: uniform_value_i64: ; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1132DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1132DAGISEL-NEXT: s_mov_b32 s4, exec_lo ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132DAGISEL-NEXT: s_and_b32 s3, s2, 1 +; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4 +; GFX1132DAGISEL-NEXT: s_and_b32 s4, s4, 1 ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX1132DAGISEL-NEXT: s_mul_i32 s3, s1, s3 +; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1132DAGISEL-NEXT: s_mul_i32 s3, s3, s4 ; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3 ; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX1132DAGISEL-NEXT: s_endpgm ; -; GFX1132GISEL-LABEL: poison_value_i64: +; GFX1132GISEL-LABEL: uniform_value_i64: ; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo +; GFX1132GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1132GISEL-NEXT: s_mov_b32 s4, exec_lo ; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 +; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s4, s4 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: s_and_b32 s3, s2, 1 +; GFX1132GISEL-NEXT: s_and_b32 s4, s4, 1 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: s_mul_i32 s2, s0, s3 -; GFX1132GISEL-NEXT: s_mul_i32 s3, s1, s3 +; GFX1132GISEL-NEXT: s_mul_i32 s2, s2, s4 +; GFX1132GISEL-NEXT: s_mul_i32 s3, s3, s4 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX1132GISEL-NEXT: s_endpgm entry: - %result = call i64 @llvm.amdgcn.wave.reduce.xor.i64(i64 poison, i32 1) + %result = call i64 @llvm.amdgcn.wave.reduce.xor.i64(i64 %in, i32 1) store i64 %result, ptr addrspace(1) %out ret void } @@ -1858,14 +1137,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX8DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX8DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s10, s[6:7] ; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v2, s10 ; GFX8DAGISEL-NEXT: v_readlane_b32 s9, v3, s10 ; GFX8DAGISEL-NEXT: s_bitset0_b64 s[6:7], s10 ; GFX8DAGISEL-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX8DAGISEL-NEXT: ; %bb.2: ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1878,14 +1157,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX8GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8GISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX8GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX8GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX8GISEL-NEXT: s_ff1_i32_b64 s10, s[6:7] ; GFX8GISEL-NEXT: v_readlane_b32 s8, v2, s10 ; GFX8GISEL-NEXT: v_readlane_b32 s9, v3, s10 ; GFX8GISEL-NEXT: s_bitset0_b64 s[6:7], s10 ; GFX8GISEL-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; GFX8GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX8GISEL-NEXT: ; %bb.2: ; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1898,14 +1177,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX9DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX9DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s10, s[6:7] ; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v2, s10 ; GFX9DAGISEL-NEXT: v_readlane_b32 s9, v3, s10 ; GFX9DAGISEL-NEXT: s_bitset0_b64 s[6:7], s10 ; GFX9DAGISEL-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX9DAGISEL-NEXT: ; %bb.2: ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1918,14 +1197,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX9GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9GISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX9GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX9GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX9GISEL-NEXT: s_ff1_i32_b64 s10, s[6:7] ; GFX9GISEL-NEXT: v_readlane_b32 s8, v2, s10 ; GFX9GISEL-NEXT: v_readlane_b32 s9, v3, s10 ; GFX9GISEL-NEXT: s_bitset0_b64 s[6:7], s10 ; GFX9GISEL-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; GFX9GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX9GISEL-NEXT: ; %bb.2: ; GFX9GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX9GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1938,14 +1217,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX1064DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s10, s[6:7] ; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v2, s10 ; GFX1064DAGISEL-NEXT: v_readlane_b32 s9, v3, s10 ; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[6:7], s10 ; GFX1064DAGISEL-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1064DAGISEL-NEXT: ; %bb.2: ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1957,14 +1236,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1064GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec -; GFX1064GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1064GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1064GISEL-NEXT: s_ff1_i32_b64 s10, s[6:7] ; GFX1064GISEL-NEXT: v_readlane_b32 s8, v2, s10 ; GFX1064GISEL-NEXT: v_readlane_b32 s9, v3, s10 ; GFX1064GISEL-NEXT: s_bitset0_b64 s[6:7], s10 ; GFX1064GISEL-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 -; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1064GISEL-NEXT: ; %bb.2: ; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1976,14 +1255,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1032DAGISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo -; GFX1032DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s8, v2, s7 ; GFX1032DAGISEL-NEXT: v_readlane_b32 s9, v3, s7 ; GFX1032DAGISEL-NEXT: s_bitset0_b32 s6, s7 ; GFX1032DAGISEL-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1032DAGISEL-NEXT: ; %bb.2: ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -1995,14 +1274,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1032GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mov_b64 s[4:5], 0 ; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo -; GFX1032GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1032GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1032GISEL-NEXT: s_ff1_i32_b32 s7, s6 ; GFX1032GISEL-NEXT: v_readlane_b32 s8, v2, s7 ; GFX1032GISEL-NEXT: v_readlane_b32 s9, v3, s7 ; GFX1032GISEL-NEXT: s_bitset0_b32 s6, s7 ; GFX1032GISEL-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9] ; GFX1032GISEL-NEXT: s_cmp_lg_u32 s6, 0 -; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1032GISEL-NEXT: ; %bb.2: ; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, s4 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v3, s5 @@ -2014,7 +1293,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s6, s[2:3] ; GFX1164DAGISEL-NEXT: v_readlane_b32 s4, v2, s6 @@ -2022,7 +1301,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s6 ; GFX1164DAGISEL-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] ; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1164DAGISEL-NEXT: ; %bb.2: ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, s0 @@ -2034,7 +1313,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1164GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1164GISEL-NEXT: s_ctz_i32_b64 s6, s[2:3] ; GFX1164GISEL-NEXT: v_readlane_b32 s4, v2, s6 @@ -2042,7 +1321,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s6 ; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] ; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 -; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1164GISEL-NEXT: ; %bb.2: ; GFX1164GISEL-NEXT: v_mov_b32_e32 v3, s1 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, s0 @@ -2054,7 +1333,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1132DAGISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132DAGISEL-NEXT: v_readlane_b32 s4, v2, s3 @@ -2062,7 +1341,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132DAGISEL-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] ; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1132DAGISEL-NEXT: ; %bb.2: ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX1132DAGISEL-NEXT: global_store_b64 v[0:1], v[2:3], off @@ -2073,7 +1352,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mov_b64 s[0:1], 0 ; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GFX1132GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 ; GFX1132GISEL-NEXT: v_readlane_b32 s4, v2, s3 @@ -2081,7 +1360,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) { ; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 ; GFX1132GISEL-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] ; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB8_1 +; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_1 ; GFX1132GISEL-NEXT: ; %bb.2: ; GFX1132GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX1132GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off @@ -2101,7 +1380,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8DAGISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX8DAGISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX8DAGISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX8DAGISEL-NEXT: ; %bb.1: ; %else ; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s6, s[6:7] @@ -2109,7 +1388,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: s_mul_i32 s6, s2, s7 ; GFX8DAGISEL-NEXT: s_mul_i32 s7, s3, s7 -; GFX8DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX8DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9] ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -2137,7 +1416,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX8GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX8GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX8GISEL-NEXT: ; %bb.1: ; %else ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s6, s[6:7] @@ -2145,10 +1424,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mul_i32 s6, s2, s7 ; GFX8GISEL-NEXT: s_mul_i32 s7, s3, s7 -; GFX8GISEL-NEXT: .LBB9_2: ; %Flow +; GFX8GISEL-NEXT: .LBB5_2: ; %Flow ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX8GISEL-NEXT: ; %bb.3: ; %if ; GFX8GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 ; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec @@ -2157,7 +1436,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: s_mul_i32 s6, s4, s7 ; GFX8GISEL-NEXT: s_mul_i32 s7, s5, s7 -; GFX8GISEL-NEXT: .LBB9_4: ; %endif +; GFX8GISEL-NEXT: .LBB5_4: ; %endif ; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 @@ -2174,7 +1453,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9DAGISEL-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GFX9DAGISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX9DAGISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX9DAGISEL-NEXT: ; %bb.1: ; %else ; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] @@ -2182,7 +1461,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9DAGISEL-NEXT: s_mul_i32 s4, s2, s5 ; GFX9DAGISEL-NEXT: s_mul_i32 s5, s3, s5 -; GFX9DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX9DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9] ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 @@ -2209,7 +1488,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX9GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX9GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX9GISEL-NEXT: ; %bb.1: ; %else ; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s6, s[6:7] @@ -2217,10 +1496,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mul_i32 s6, s2, s7 ; GFX9GISEL-NEXT: s_mul_i32 s7, s3, s7 -; GFX9GISEL-NEXT: .LBB9_2: ; %Flow +; GFX9GISEL-NEXT: .LBB5_2: ; %Flow ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX9GISEL-NEXT: ; %bb.3: ; %if ; GFX9GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX9GISEL-NEXT: s_mov_b64 s[4:5], exec @@ -2229,7 +1508,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: s_mul_i32 s6, s6, s4 ; GFX9GISEL-NEXT: s_mul_i32 s7, s7, s4 -; GFX9GISEL-NEXT: .LBB9_4: ; %endif +; GFX9GISEL-NEXT: .LBB5_4: ; %endif ; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -2246,7 +1525,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064DAGISEL-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GFX1064DAGISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX1064DAGISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1064DAGISEL-NEXT: ; %bb.1: ; %else ; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], exec ; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5] @@ -2254,7 +1533,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064DAGISEL-NEXT: s_mul_i32 s4, s2, s5 ; GFX1064DAGISEL-NEXT: s_mul_i32 s5, s3, s5 -; GFX1064DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX1064DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9] ; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4 @@ -2281,7 +1560,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX1064GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1064GISEL-NEXT: ; %bb.1: ; %else ; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s6, s[6:7] @@ -2289,10 +1568,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mul_i32 s6, s2, s7 ; GFX1064GISEL-NEXT: s_mul_i32 s7, s3, s7 -; GFX1064GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1064GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9] -; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1064GISEL-NEXT: ; %bb.3: ; %if ; GFX1064GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], exec @@ -2301,7 +1580,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064GISEL-NEXT: s_mul_i32 s6, s6, s4 ; GFX1064GISEL-NEXT: s_mul_i32 s7, s7, s4 -; GFX1064GISEL-NEXT: .LBB9_4: ; %endif +; GFX1064GISEL-NEXT: .LBB5_4: ; %endif ; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -2318,7 +1597,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032DAGISEL-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GFX1032DAGISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GFX1032DAGISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1032DAGISEL-NEXT: ; %bb.1: ; %else ; GFX1032DAGISEL-NEXT: s_mov_b32 s4, exec_lo ; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4 @@ -2326,7 +1605,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032DAGISEL-NEXT: s_mul_i32 s4, s2, s5 ; GFX1032DAGISEL-NEXT: s_mul_i32 s5, s3, s5 -; GFX1032DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX1032DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032DAGISEL-NEXT: s_or_saveexec_b32 s2, s8 ; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s4 @@ -2353,7 +1632,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX1032GISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GFX1032GISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1032GISEL-NEXT: ; %bb.1: ; %else ; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo ; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s6, s6 @@ -2361,10 +1640,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mul_i32 s6, s2, s7 ; GFX1032GISEL-NEXT: s_mul_i32 s7, s3, s7 -; GFX1032GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1032GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s2, s8 -; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1032GISEL-NEXT: ; %bb.3: ; %if ; GFX1032GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo @@ -2373,7 +1652,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032GISEL-NEXT: s_mul_i32 s6, s6, s3 ; GFX1032GISEL-NEXT: s_mul_i32 s7, s7, s3 -; GFX1032GISEL-NEXT: .LBB9_4: ; %endif +; GFX1032GISEL-NEXT: .LBB5_4: ; %endif ; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -2392,7 +1671,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 ; GFX1164DAGISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1164DAGISEL-NEXT: ; %bb.1: ; %else ; GFX1164DAGISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) @@ -2401,7 +1680,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164DAGISEL-NEXT: s_mul_i32 s6, s2, s7 ; GFX1164DAGISEL-NEXT: s_mul_i32 s7, s3, s7 -; GFX1164DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX1164DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9] ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s6 @@ -2432,7 +1711,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1164GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1164GISEL-NEXT: ; %bb.1: ; %else ; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], exec ; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) @@ -2441,10 +1720,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mul_i32 s6, s2, s7 ; GFX1164GISEL-NEXT: s_mul_i32 s7, s3, s7 -; GFX1164GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1164GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[2:3], s[8:9] -; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1164GISEL-NEXT: ; %bb.3: ; %if ; GFX1164GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], exec @@ -2454,7 +1733,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: s_mul_i32 s6, s4, s7 ; GFX1164GISEL-NEXT: s_mul_i32 s7, s5, s7 -; GFX1164GISEL-NEXT: .LBB9_4: ; %endif +; GFX1164GISEL-NEXT: .LBB5_4: ; %endif ; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s7 @@ -2473,7 +1752,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 ; GFX1132DAGISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1132DAGISEL-NEXT: ; %bb.1: ; %else ; GFX1132DAGISEL-NEXT: s_mov_b32 s6, exec_lo ; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) @@ -2482,7 +1761,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132DAGISEL-NEXT: s_mul_i32 s6, s2, s7 ; GFX1132DAGISEL-NEXT: s_mul_i32 s7, s3, s7 -; GFX1132DAGISEL-NEXT: .LBB9_2: ; %Flow +; GFX1132DAGISEL-NEXT: .LBB5_2: ; %Flow ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132DAGISEL-NEXT: s_or_saveexec_b32 s2, s8 ; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 @@ -2512,7 +1791,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 ; GFX1132GISEL-NEXT: s_xor_b32 s8, exec_lo, s8 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_2 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_2 ; GFX1132GISEL-NEXT: ; %bb.1: ; %else ; GFX1132GISEL-NEXT: s_mov_b32 s6, exec_lo ; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) @@ -2521,10 +1800,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mul_i32 s6, s2, s7 ; GFX1132GISEL-NEXT: s_mul_i32 s7, s3, s7 -; GFX1132GISEL-NEXT: .LBB9_2: ; %Flow +; GFX1132GISEL-NEXT: .LBB5_2: ; %Flow ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s2, s8 -; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_4 +; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GFX1132GISEL-NEXT: ; %bb.3: ; %if ; GFX1132GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo @@ -2534,7 +1813,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: s_mul_i32 s6, s4, s3 ; GFX1132GISEL-NEXT: s_mul_i32 s7, s5, s3 -; GFX1132GISEL-NEXT: .LBB9_4: ; %endif +; GFX1132GISEL-NEXT: .LBB5_4: ; %endif ; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 ; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0 diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/wave.reduce.ll b/llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/wave.reduce.ll index 863598578ea77..a27a121a3af61 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/wave.reduce.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/wave.reduce.ll @@ -2,13 +2,14 @@ ; RUN: opt < %s -passes=instsimplify -S | FileCheck %s ; -------------------------------------------------------------------- -; llvm.amdgcn.wave.reduce.umin.i32 +; llvm.amdgcn.wave.reduce.umin ; -------------------------------------------------------------------- declare i32 @llvm.amdgcn.wave.reduce.umin.i32(i32, i32 immarg) +declare i64 @llvm.amdgcn.wave.reduce.umin.i64(i64, i32 immarg) -define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_poison(ptr addrspace(1) %out, i32 %in) { -; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_poison( +define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_i32_poison(ptr addrspace(1) %out, i32 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_i32_poison( ; CHECK-NEXT: entry: ; CHECK-NEXT: store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4 ; CHECK-NEXT: ret void @@ -19,8 +20,8 @@ entry: ret void } -define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_const(ptr addrspace(1) %out) { -; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_const( +define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_i32_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_i32_const( ; CHECK-NEXT: entry: ; CHECK-NEXT: store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4 ; CHECK-NEXT: ret void @@ -31,14 +32,94 @@ entry: ret void } +define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_i64_poison(ptr addrspace(1) %out, i64 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_i64_poison( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i64 @llvm.amdgcn.wave.reduce.umin.i64(i64 poison, i32 1) + store i64 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_i64_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_i64_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i64 @llvm.amdgcn.wave.reduce.umin.i64(i64 123, i32 1) + store i64 %result, ptr addrspace(1) %out + ret void +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.wave.reduce.min +; -------------------------------------------------------------------- + +declare i32 @llvm.amdgcn.wave.reduce.min.i32(i32, i32 immarg) +declare i64 @llvm.amdgcn.wave.reduce.min.i64(i64, i32 immarg) + +define amdgpu_kernel void @test_constant_fold_wave_reduce_min_i32_poison(ptr addrspace(1) %out, i32 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_min_i32_poison( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i32 @llvm.amdgcn.wave.reduce.min.i32(i32 poison, i32 1) + store i32 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_min_i32_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_min_i32_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i32 @llvm.amdgcn.wave.reduce.min.i32(i32 123, i32 1) + store i32 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_min_i64_poison(ptr addrspace(1) %out, i64 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_min_i64_poison( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i64 @llvm.amdgcn.wave.reduce.min.i64(i64 poison, i32 1) + store i64 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_min_i64_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_min_i64_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i64 @llvm.amdgcn.wave.reduce.min.i64(i64 123, i32 1) + store i64 %result, ptr addrspace(1) %out + ret void +} + ; -------------------------------------------------------------------- -; llvm.amdgcn.wave.reduce.umin.i32 +; llvm.amdgcn.wave.reduce.umax ; -------------------------------------------------------------------- declare i32 @llvm.amdgcn.wave.reduce.umax.i32(i32, i32 immarg) +declare i64 @llvm.amdgcn.wave.reduce.umax.i64(i64, i32 immarg) -define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_poison(ptr addrspace(1) %out, i32 %in) { -; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_poison( +define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_i32_poison(ptr addrspace(1) %out, i32 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_i32_poison( ; CHECK-NEXT: entry: ; CHECK-NEXT: store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4 ; CHECK-NEXT: ret void @@ -49,8 +130,8 @@ entry: ret void } -define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_const(ptr addrspace(1) %out) { -; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_const( +define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_i32_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_i32_const( ; CHECK-NEXT: entry: ; CHECK-NEXT: store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4 ; CHECK-NEXT: ret void @@ -61,6 +142,30 @@ entry: ret void } +define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_i64_poison(ptr addrspace(1) %out, i32 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_i64_poison( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i64 @llvm.amdgcn.wave.reduce.umax.i64(i64 poison, i32 1) + store i64 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_i64_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_i64_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i64 @llvm.amdgcn.wave.reduce.umax.i64(i64 123, i32 1) + store i64 %result, ptr addrspace(1) %out + ret void +} + @gv = constant i32 0 define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_gv(ptr addrspace(1) %out) { ; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_gv( @@ -74,3 +179,333 @@ entry: store i32 %result, ptr addrspace(1) %out ret void } + +; -------------------------------------------------------------------- +; llvm.amdgcn.wave.reduce.max +; -------------------------------------------------------------------- + +declare i32 @llvm.amdgcn.wave.reduce.max.i32(i32, i32 immarg) +declare i64 @llvm.amdgcn.wave.reduce.max.i64(i64, i32 immarg) + +define amdgpu_kernel void @test_constant_fold_wave_reduce_max_i32_poison(ptr addrspace(1) %out, i32 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_max_i32_poison( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i32 @llvm.amdgcn.wave.reduce.max.i32(i32 poison, i32 1) + store i32 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_max_i32_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_max_i32_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i32 @llvm.amdgcn.wave.reduce.max.i32(i32 123, i32 1) + store i32 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_max_i64_poison(ptr addrspace(1) %out, i64 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_max_i64_poison( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i64 @llvm.amdgcn.wave.reduce.max.i64(i64 poison, i32 1) + store i64 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_max_i64_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_max_i64_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i64 @llvm.amdgcn.wave.reduce.max.i64(i64 123, i32 1) + store i64 %result, ptr addrspace(1) %out + ret void +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.wave.reduce.add +; -------------------------------------------------------------------- + +declare i32 @llvm.amdgcn.wave.reduce.add.i32(i32, i32 immarg) +declare i64 @llvm.amdgcn.wave.reduce.add.i64(i64, i32 immarg) + +define amdgpu_kernel void @test_constant_fold_wave_reduce_add_i32_poison(ptr addrspace(1) %out, i32 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_add_i32_poison( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 poison, i32 1) + store i32 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_add_i32_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_add_i32_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 123, i32 1) + store i32 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_add_i64_poison(ptr addrspace(1) %out, i64 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_add_i64_poison( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i64 @llvm.amdgcn.wave.reduce.add.i64(i64 poison, i32 1) + store i64 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_add_i64_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_add_i64_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i64 @llvm.amdgcn.wave.reduce.add.i64(i64 123, i32 1) + store i64 %result, ptr addrspace(1) %out + ret void +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.wave.reduce.sub +; -------------------------------------------------------------------- + +declare i32 @llvm.amdgcn.wave.reduce.sub.i32(i32, i32 immarg) +declare i64 @llvm.amdgcn.wave.reduce.sub.i64(i64, i32 immarg) + +define amdgpu_kernel void @test_constant_fold_wave_reduce_sub_i32_poison(ptr addrspace(1) %out, i32 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_sub_i32_poison( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 poison, i32 1) + store i32 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_sub_i32_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_sub_i32_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 123, i32 1) + store i32 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_sub_i64_poison(ptr addrspace(1) %out, i64 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_sub_i64_poison( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i64 @llvm.amdgcn.wave.reduce.sub.i64(i64 poison, i32 1) + store i64 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_sub_i64_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_sub_i64_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i64 @llvm.amdgcn.wave.reduce.sub.i64(i64 123, i32 1) + store i64 %result, ptr addrspace(1) %out + ret void +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.wave.reduce.and +; -------------------------------------------------------------------- + +declare i32 @llvm.amdgcn.wave.reduce.and.i32(i32, i32 immarg) +declare i64 @llvm.amdgcn.wave.reduce.and.i64(i64, i32 immarg) + +define amdgpu_kernel void @test_constant_fold_wave_reduce_and_i32_poison(ptr addrspace(1) %out, i32 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_and_i32_poison( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i32 @llvm.amdgcn.wave.reduce.and.i32(i32 poison, i32 1) + store i32 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_and_i32_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_and_i32_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i32 @llvm.amdgcn.wave.reduce.and.i32(i32 123, i32 1) + store i32 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_and_i64_poison(ptr addrspace(1) %out, i64 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_and_i64_poison( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i64 @llvm.amdgcn.wave.reduce.and.i64(i64 poison, i32 1) + store i64 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_and_i64_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_and_i64_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i64 @llvm.amdgcn.wave.reduce.and.i64(i64 123, i32 1) + store i64 %result, ptr addrspace(1) %out + ret void +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.wave.reduce.or +; -------------------------------------------------------------------- + +declare i32 @llvm.amdgcn.wave.reduce.or.i32(i32, i32 immarg) +declare i64 @llvm.amdgcn.wave.reduce.or.i64(i64, i32 immarg) + +define amdgpu_kernel void @test_constant_fold_wave_reduce_or_i32_poison(ptr addrspace(1) %out, i32 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_or_i32_poison( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i32 @llvm.amdgcn.wave.reduce.or.i32(i32 poison, i32 1) + store i32 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_or_i32_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_or_i32_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i32 @llvm.amdgcn.wave.reduce.or.i32(i32 123, i32 1) + store i32 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_or_i64_poison(ptr addrspace(1) %out, i64 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_or_i64_poison( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i64 @llvm.amdgcn.wave.reduce.or.i64(i64 poison, i32 1) + store i64 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_or_i64_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_or_i64_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i64 @llvm.amdgcn.wave.reduce.or.i64(i64 123, i32 1) + store i64 %result, ptr addrspace(1) %out + ret void +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.wave.reduce.xor +; -------------------------------------------------------------------- + +declare i32 @llvm.amdgcn.wave.reduce.xor.i32(i32, i32 immarg) +declare i64 @llvm.amdgcn.wave.reduce.xor.i64(i64, i32 immarg) + +define amdgpu_kernel void @test_constant_fold_wave_reduce_xor_i32_poison(ptr addrspace(1) %out, i32 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_xor_i32_poison( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 poison, i32 1) + store i32 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_xor_i32_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_xor_i32_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 123, i32 1) + store i32 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_xor_i64_poison(ptr addrspace(1) %out, i64 %in) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_xor_i64_poison( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i64 @llvm.amdgcn.wave.reduce.xor.i64(i64 poison, i32 1) + store i64 %result, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_constant_fold_wave_reduce_xor_i64_const(ptr addrspace(1) %out) { +; CHECK-LABEL: @test_constant_fold_wave_reduce_xor_i64_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %result = call i64 @llvm.amdgcn.wave.reduce.xor.i64(i64 123, i32 1) + store i64 %result, ptr addrspace(1) %out + ret void +}