diff --git a/llvm/lib/CodeGen/RenameIndependentSubregs.cpp b/llvm/lib/CodeGen/RenameIndependentSubregs.cpp index bc3ef1c0329a9..e888f290df510 100644 --- a/llvm/lib/CodeGen/RenameIndependentSubregs.cpp +++ b/llvm/lib/CodeGen/RenameIndependentSubregs.cpp @@ -334,10 +334,17 @@ void RenameIndependentSubregs::computeMainRangesFixFlags( DebugLoc(), MCDesc, Reg); SlotIndex DefIdx = LIS->InsertMachineInstrInMaps(*ImpDef); SlotIndex RegDefIdx = DefIdx.getRegSlot(); + LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(Reg); for (LiveInterval::SubRange &SR : LI.subranges()) { + Mask = Mask & ~SR.LaneMask; VNInfo *SRVNI = SR.getNextValue(RegDefIdx, Allocator); SR.addSegment(LiveRange::Segment(RegDefIdx, PredEnd, SRVNI)); } + + if (!Mask.none()) { + LiveInterval::SubRange *SR = LI.createSubRange(Allocator, Mask); + SR->createDeadDef(RegDefIdx, Allocator); + } } } } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll index 47e476de74cf4..6e96a4ddbc0b3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll @@ -144,40 +144,42 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16(i64 %node_ptr, float define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> %tdescr) { ; GFX1030-LABEL: image_bvh_intersect_ray_vgpr_descr: ; GFX1030: ; %bb.0: -; GFX1030-NEXT: v_mov_b32_e32 v15, v0 -; GFX1030-NEXT: v_mov_b32_e32 v16, v1 -; GFX1030-NEXT: v_mov_b32_e32 v17, v2 -; GFX1030-NEXT: v_mov_b32_e32 v18, v3 -; GFX1030-NEXT: v_mov_b32_e32 v19, v4 -; GFX1030-NEXT: v_mov_b32_e32 v20, v5 -; GFX1030-NEXT: v_mov_b32_e32 v21, v6 -; GFX1030-NEXT: v_mov_b32_e32 v22, v7 -; GFX1030-NEXT: v_mov_b32_e32 v23, v8 -; GFX1030-NEXT: v_mov_b32_e32 v24, v9 -; GFX1030-NEXT: v_mov_b32_e32 v25, v10 +; GFX1030-NEXT: v_mov_b32_e32 v21, v0 +; GFX1030-NEXT: v_mov_b32_e32 v22, v1 +; GFX1030-NEXT: v_mov_b32_e32 v23, v2 +; GFX1030-NEXT: v_mov_b32_e32 v24, v3 +; GFX1030-NEXT: v_mov_b32_e32 v25, v4 +; GFX1030-NEXT: v_mov_b32_e32 v26, v5 +; GFX1030-NEXT: v_mov_b32_e32 v27, v6 +; GFX1030-NEXT: v_mov_b32_e32 v28, v7 +; GFX1030-NEXT: v_mov_b32_e32 v29, v8 +; GFX1030-NEXT: v_mov_b32_e32 v30, v9 +; GFX1030-NEXT: v_mov_b32_e32 v31, v10 +; GFX1030-NEXT: v_mov_b32_e32 v19, v11 +; GFX1030-NEXT: v_mov_b32_e32 v20, v12 ; GFX1030-NEXT: s_mov_b32 s1, exec_lo ; GFX1030-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 -; GFX1030-NEXT: v_readfirstlane_b32 s4, v11 -; GFX1030-NEXT: v_readfirstlane_b32 s5, v12 +; GFX1030-NEXT: v_readfirstlane_b32 s4, v19 +; GFX1030-NEXT: v_readfirstlane_b32 s5, v20 ; GFX1030-NEXT: v_readfirstlane_b32 s6, v13 ; GFX1030-NEXT: v_readfirstlane_b32 s7, v14 -; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[11:12] +; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[19:20] ; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[13:14] ; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1030-NEXT: s_and_saveexec_b32 s0, s0 -; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[15:25], s[4:7] -; GFX1030-NEXT: ; implicit-def: $vgpr11 -; GFX1030-NEXT: ; implicit-def: $vgpr15 -; GFX1030-NEXT: ; implicit-def: $vgpr16 -; GFX1030-NEXT: ; implicit-def: $vgpr17 -; GFX1030-NEXT: ; implicit-def: $vgpr18 +; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[21:31], s[4:7] ; GFX1030-NEXT: ; implicit-def: $vgpr19 -; GFX1030-NEXT: ; implicit-def: $vgpr20 ; GFX1030-NEXT: ; implicit-def: $vgpr21 ; GFX1030-NEXT: ; implicit-def: $vgpr22 ; GFX1030-NEXT: ; implicit-def: $vgpr23 ; GFX1030-NEXT: ; implicit-def: $vgpr24 ; GFX1030-NEXT: ; implicit-def: $vgpr25 +; GFX1030-NEXT: ; implicit-def: $vgpr26 +; GFX1030-NEXT: ; implicit-def: $vgpr27 +; GFX1030-NEXT: ; implicit-def: $vgpr28 +; GFX1030-NEXT: ; implicit-def: $vgpr29 +; GFX1030-NEXT: ; implicit-def: $vgpr30 +; GFX1030-NEXT: ; implicit-def: $vgpr31 ; GFX1030-NEXT: ; implicit-def: $vgpr11_vgpr12_vgpr13_vgpr14 ; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1030-NEXT: s_cbranch_execnz .LBB6_1 @@ -188,18 +190,20 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr, ; ; GFX1013-LABEL: image_bvh_intersect_ray_vgpr_descr: ; GFX1013: ; %bb.0: +; GFX1013-NEXT: v_mov_b32_e32 v19, v11 +; GFX1013-NEXT: v_mov_b32_e32 v20, v12 ; GFX1013-NEXT: s_mov_b32 s1, exec_lo ; GFX1013-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 -; GFX1013-NEXT: v_readfirstlane_b32 s4, v11 -; GFX1013-NEXT: v_readfirstlane_b32 s5, v12 +; GFX1013-NEXT: v_readfirstlane_b32 s4, v19 +; GFX1013-NEXT: v_readfirstlane_b32 s5, v20 ; GFX1013-NEXT: v_readfirstlane_b32 s6, v13 ; GFX1013-NEXT: v_readfirstlane_b32 s7, v14 -; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[11:12] +; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[19:20] ; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[13:14] ; GFX1013-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1013-NEXT: s_and_saveexec_b32 s0, s0 ; GFX1013-NEXT: image_bvh_intersect_ray v[15:18], v[0:10], s[4:7] -; GFX1013-NEXT: ; implicit-def: $vgpr11 +; GFX1013-NEXT: ; implicit-def: $vgpr19 ; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10 ; GFX1013-NEXT: ; implicit-def: $vgpr11_vgpr12_vgpr13_vgpr14 ; GFX1013-NEXT: s_waitcnt_depctr 0xffe3 @@ -216,25 +220,27 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr, ; ; GFX11-LABEL: image_bvh_intersect_ray_vgpr_descr: ; GFX11: ; %bb.0: -; GFX11-NEXT: v_dual_mov_b32 v18, v0 :: v_dual_mov_b32 v19, v1 +; GFX11-NEXT: v_dual_mov_b32 v20, v0 :: v_dual_mov_b32 v21, v1 ; GFX11-NEXT: v_dual_mov_b32 v15, v2 :: v_dual_mov_b32 v16, v3 -; GFX11-NEXT: v_mov_b32_e32 v17, v4 +; GFX11-NEXT: v_dual_mov_b32 v17, v4 :: v_dual_mov_b32 v18, v11 +; GFX11-NEXT: v_mov_b32_e32 v19, v12 ; GFX11-NEXT: s_mov_b32 s1, exec_lo ; GFX11-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-NEXT: v_readfirstlane_b32 s4, v11 -; GFX11-NEXT: v_readfirstlane_b32 s5, v12 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_readfirstlane_b32 s4, v18 +; GFX11-NEXT: v_readfirstlane_b32 s5, v19 ; GFX11-NEXT: v_readfirstlane_b32 s6, v13 ; GFX11-NEXT: v_readfirstlane_b32 s7, v14 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[11:12] +; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[18:19] ; GFX11-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[13:14] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX11-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-NEXT: image_bvh_intersect_ray v[0:3], [v18, v19, v[15:17], v[5:7], v[8:10]], s[4:7] -; GFX11-NEXT: ; implicit-def: $vgpr11 +; GFX11-NEXT: image_bvh_intersect_ray v[0:3], [v20, v21, v[15:17], v[5:7], v[8:10]], s[4:7] ; GFX11-NEXT: ; implicit-def: $vgpr18 -; GFX11-NEXT: ; implicit-def: $vgpr19 +; GFX11-NEXT: ; implicit-def: $vgpr20 +; GFX11-NEXT: ; implicit-def: $vgpr21 ; GFX11-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17 ; GFX11-NEXT: ; implicit-def: $vgpr5_vgpr6_vgpr7 ; GFX11-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10 @@ -253,39 +259,41 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr, define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x half> %ray_dir, <3 x half> %ray_inv_dir, <4 x i32> %tdescr) { ; GFX1030-LABEL: image_bvh_intersect_ray_a16_vgpr_descr: ; GFX1030: ; %bb.0: -; GFX1030-NEXT: v_mov_b32_e32 v13, v0 -; GFX1030-NEXT: v_mov_b32_e32 v14, v1 +; GFX1030-NEXT: v_mov_b32_e32 v18, v0 +; GFX1030-NEXT: v_mov_b32_e32 v19, v1 ; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v5 ; GFX1030-NEXT: v_and_b32_e32 v1, 0xffff, v7 -; GFX1030-NEXT: v_mov_b32_e32 v15, v2 +; GFX1030-NEXT: v_mov_b32_e32 v20, v2 ; GFX1030-NEXT: v_and_b32_e32 v2, 0xffff, v8 -; GFX1030-NEXT: v_mov_b32_e32 v16, v3 +; GFX1030-NEXT: v_mov_b32_e32 v21, v3 ; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX1030-NEXT: v_mov_b32_e32 v17, v4 -; GFX1030-NEXT: v_alignbit_b32 v20, v2, v7, 16 +; GFX1030-NEXT: v_mov_b32_e32 v22, v4 +; GFX1030-NEXT: v_mov_b32_e32 v16, v9 +; GFX1030-NEXT: v_mov_b32_e32 v17, v10 +; GFX1030-NEXT: v_and_or_b32 v23, 0xffff, v5, v0 +; GFX1030-NEXT: v_and_or_b32 v24, 0xffff, v6, v1 +; GFX1030-NEXT: v_alignbit_b32 v25, v2, v7, 16 ; GFX1030-NEXT: s_mov_b32 s1, exec_lo -; GFX1030-NEXT: v_and_or_b32 v18, 0xffff, v5, v0 -; GFX1030-NEXT: v_and_or_b32 v19, 0xffff, v6, v1 ; GFX1030-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 -; GFX1030-NEXT: v_readfirstlane_b32 s4, v9 -; GFX1030-NEXT: v_readfirstlane_b32 s5, v10 +; GFX1030-NEXT: v_readfirstlane_b32 s4, v16 +; GFX1030-NEXT: v_readfirstlane_b32 s5, v17 ; GFX1030-NEXT: v_readfirstlane_b32 s6, v11 ; GFX1030-NEXT: v_readfirstlane_b32 s7, v12 -; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[9:10] +; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[16:17] ; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[11:12] ; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1030-NEXT: s_and_saveexec_b32 s0, s0 -; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[13:20], s[4:7] a16 -; GFX1030-NEXT: ; implicit-def: $vgpr9 -; GFX1030-NEXT: ; implicit-def: $vgpr13 -; GFX1030-NEXT: ; implicit-def: $vgpr14 -; GFX1030-NEXT: ; implicit-def: $vgpr15 +; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[18:25], s[4:7] a16 ; GFX1030-NEXT: ; implicit-def: $vgpr16 -; GFX1030-NEXT: ; implicit-def: $vgpr17 ; GFX1030-NEXT: ; implicit-def: $vgpr18 ; GFX1030-NEXT: ; implicit-def: $vgpr19 ; GFX1030-NEXT: ; implicit-def: $vgpr20 +; GFX1030-NEXT: ; implicit-def: $vgpr21 +; GFX1030-NEXT: ; implicit-def: $vgpr22 +; GFX1030-NEXT: ; implicit-def: $vgpr23 +; GFX1030-NEXT: ; implicit-def: $vgpr24 +; GFX1030-NEXT: ; implicit-def: $vgpr25 ; GFX1030-NEXT: ; implicit-def: $vgpr9_vgpr10_vgpr11_vgpr12 ; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1030-NEXT: s_cbranch_execnz .LBB7_1 @@ -296,26 +304,28 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_p ; ; GFX1013-LABEL: image_bvh_intersect_ray_a16_vgpr_descr: ; GFX1013: ; %bb.0: -; GFX1013-NEXT: v_lshrrev_b32_e32 v13, 16, v5 -; GFX1013-NEXT: v_and_b32_e32 v14, 0xffff, v7 +; GFX1013-NEXT: v_mov_b32_e32 v17, v9 +; GFX1013-NEXT: v_mov_b32_e32 v18, v10 +; GFX1013-NEXT: v_lshrrev_b32_e32 v9, 16, v5 +; GFX1013-NEXT: v_and_b32_e32 v10, 0xffff, v7 ; GFX1013-NEXT: v_and_b32_e32 v8, 0xffff, v8 ; GFX1013-NEXT: s_mov_b32 s1, exec_lo -; GFX1013-NEXT: v_lshlrev_b32_e32 v13, 16, v13 -; GFX1013-NEXT: v_lshlrev_b32_e32 v14, 16, v14 +; GFX1013-NEXT: v_lshlrev_b32_e32 v9, 16, v9 +; GFX1013-NEXT: v_lshlrev_b32_e32 v10, 16, v10 ; GFX1013-NEXT: v_alignbit_b32 v7, v8, v7, 16 -; GFX1013-NEXT: v_and_or_b32 v5, 0xffff, v5, v13 -; GFX1013-NEXT: v_and_or_b32 v6, 0xffff, v6, v14 +; GFX1013-NEXT: v_and_or_b32 v5, 0xffff, v5, v9 +; GFX1013-NEXT: v_and_or_b32 v6, 0xffff, v6, v10 ; GFX1013-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 -; GFX1013-NEXT: v_readfirstlane_b32 s4, v9 -; GFX1013-NEXT: v_readfirstlane_b32 s5, v10 +; GFX1013-NEXT: v_readfirstlane_b32 s4, v17 +; GFX1013-NEXT: v_readfirstlane_b32 s5, v18 ; GFX1013-NEXT: v_readfirstlane_b32 s6, v11 ; GFX1013-NEXT: v_readfirstlane_b32 s7, v12 -; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[9:10] +; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[17:18] ; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[11:12] ; GFX1013-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1013-NEXT: s_and_saveexec_b32 s0, s0 ; GFX1013-NEXT: image_bvh_intersect_ray v[13:16], v[0:7], s[4:7] a16 -; GFX1013-NEXT: ; implicit-def: $vgpr9 +; GFX1013-NEXT: ; implicit-def: $vgpr17 ; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX1013-NEXT: ; implicit-def: $vgpr9_vgpr10_vgpr11_vgpr12 ; GFX1013-NEXT: s_waitcnt_depctr 0xffe3 @@ -333,27 +343,28 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_p ; GFX11-LABEL: image_bvh_intersect_ray_a16_vgpr_descr: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_dual_mov_b32 v16, v0 :: v_dual_mov_b32 v17, v1 -; GFX11-NEXT: v_dual_mov_b32 v15, v4 :: v_dual_and_b32 v0, 0xffff, v7 +; GFX11-NEXT: v_dual_mov_b32 v19, v10 :: v_dual_and_b32 v0, 0xffff, v7 ; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v8 ; GFX11-NEXT: v_dual_mov_b32 v13, v2 :: v_dual_mov_b32 v14, v3 -; GFX11-NEXT: s_mov_b32 s1, exec_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX11-NEXT: v_dual_mov_b32 v15, v4 :: v_dual_mov_b32 v18, v9 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) ; GFX11-NEXT: v_lshl_or_b32 v4, v5, 16, v0 ; GFX11-NEXT: v_perm_b32 v5, v5, v7, 0x7060302 ; GFX11-NEXT: v_lshl_or_b32 v6, v6, 16, v1 +; GFX11-NEXT: s_mov_b32 s1, exec_lo ; GFX11-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-NEXT: v_readfirstlane_b32 s4, v9 -; GFX11-NEXT: v_readfirstlane_b32 s5, v10 +; GFX11-NEXT: v_readfirstlane_b32 s4, v18 +; GFX11-NEXT: v_readfirstlane_b32 s5, v19 ; GFX11-NEXT: v_readfirstlane_b32 s6, v11 ; GFX11-NEXT: v_readfirstlane_b32 s7, v12 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[9:10] +; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[18:19] ; GFX11-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[11:12] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX11-NEXT: s_and_saveexec_b32 s0, s0 ; GFX11-NEXT: image_bvh_intersect_ray v[0:3], [v16, v17, v[13:15], v[4:6]], s[4:7] a16 -; GFX11-NEXT: ; implicit-def: $vgpr9 +; GFX11-NEXT: ; implicit-def: $vgpr18 ; GFX11-NEXT: ; implicit-def: $vgpr16 ; GFX11-NEXT: ; implicit-def: $vgpr17 ; GFX11-NEXT: ; implicit-def: $vgpr13_vgpr14_vgpr15 @@ -373,42 +384,44 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_p define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> %tdescr) { ; GFX1030-LABEL: image_bvh64_intersect_ray_vgpr_descr: ; GFX1030: ; %bb.0: -; GFX1030-NEXT: v_mov_b32_e32 v16, v0 -; GFX1030-NEXT: v_mov_b32_e32 v17, v1 -; GFX1030-NEXT: v_mov_b32_e32 v18, v2 -; GFX1030-NEXT: v_mov_b32_e32 v19, v3 -; GFX1030-NEXT: v_mov_b32_e32 v20, v4 -; GFX1030-NEXT: v_mov_b32_e32 v21, v5 -; GFX1030-NEXT: v_mov_b32_e32 v22, v6 -; GFX1030-NEXT: v_mov_b32_e32 v23, v7 -; GFX1030-NEXT: v_mov_b32_e32 v24, v8 -; GFX1030-NEXT: v_mov_b32_e32 v25, v9 -; GFX1030-NEXT: v_mov_b32_e32 v26, v10 -; GFX1030-NEXT: v_mov_b32_e32 v27, v11 +; GFX1030-NEXT: v_mov_b32_e32 v22, v0 +; GFX1030-NEXT: v_mov_b32_e32 v23, v1 +; GFX1030-NEXT: v_mov_b32_e32 v24, v2 +; GFX1030-NEXT: v_mov_b32_e32 v25, v3 +; GFX1030-NEXT: v_mov_b32_e32 v26, v4 +; GFX1030-NEXT: v_mov_b32_e32 v27, v5 +; GFX1030-NEXT: v_mov_b32_e32 v28, v6 +; GFX1030-NEXT: v_mov_b32_e32 v29, v7 +; GFX1030-NEXT: v_mov_b32_e32 v30, v8 +; GFX1030-NEXT: v_mov_b32_e32 v31, v9 +; GFX1030-NEXT: v_mov_b32_e32 v32, v10 +; GFX1030-NEXT: v_mov_b32_e32 v33, v11 +; GFX1030-NEXT: v_mov_b32_e32 v20, v12 +; GFX1030-NEXT: v_mov_b32_e32 v21, v13 ; GFX1030-NEXT: s_mov_b32 s1, exec_lo ; GFX1030-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GFX1030-NEXT: v_readfirstlane_b32 s4, v12 -; GFX1030-NEXT: v_readfirstlane_b32 s5, v13 +; GFX1030-NEXT: v_readfirstlane_b32 s4, v20 +; GFX1030-NEXT: v_readfirstlane_b32 s5, v21 ; GFX1030-NEXT: v_readfirstlane_b32 s6, v14 ; GFX1030-NEXT: v_readfirstlane_b32 s7, v15 -; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[12:13] +; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[20:21] ; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[14:15] ; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1030-NEXT: s_and_saveexec_b32 s0, s0 -; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[16:27], s[4:7] -; GFX1030-NEXT: ; implicit-def: $vgpr12 -; GFX1030-NEXT: ; implicit-def: $vgpr16 -; GFX1030-NEXT: ; implicit-def: $vgpr17 -; GFX1030-NEXT: ; implicit-def: $vgpr18 -; GFX1030-NEXT: ; implicit-def: $vgpr19 +; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[22:33], s[4:7] ; GFX1030-NEXT: ; implicit-def: $vgpr20 -; GFX1030-NEXT: ; implicit-def: $vgpr21 ; GFX1030-NEXT: ; implicit-def: $vgpr22 ; GFX1030-NEXT: ; implicit-def: $vgpr23 ; GFX1030-NEXT: ; implicit-def: $vgpr24 ; GFX1030-NEXT: ; implicit-def: $vgpr25 ; GFX1030-NEXT: ; implicit-def: $vgpr26 ; GFX1030-NEXT: ; implicit-def: $vgpr27 +; GFX1030-NEXT: ; implicit-def: $vgpr28 +; GFX1030-NEXT: ; implicit-def: $vgpr29 +; GFX1030-NEXT: ; implicit-def: $vgpr30 +; GFX1030-NEXT: ; implicit-def: $vgpr31 +; GFX1030-NEXT: ; implicit-def: $vgpr32 +; GFX1030-NEXT: ; implicit-def: $vgpr33 ; GFX1030-NEXT: ; implicit-def: $vgpr12_vgpr13_vgpr14_vgpr15 ; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1030-NEXT: s_cbranch_execnz .LBB8_1 @@ -419,18 +432,20 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr ; ; GFX1013-LABEL: image_bvh64_intersect_ray_vgpr_descr: ; GFX1013: ; %bb.0: +; GFX1013-NEXT: v_mov_b32_e32 v20, v12 +; GFX1013-NEXT: v_mov_b32_e32 v21, v13 ; GFX1013-NEXT: s_mov_b32 s1, exec_lo ; GFX1013-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GFX1013-NEXT: v_readfirstlane_b32 s4, v12 -; GFX1013-NEXT: v_readfirstlane_b32 s5, v13 +; GFX1013-NEXT: v_readfirstlane_b32 s4, v20 +; GFX1013-NEXT: v_readfirstlane_b32 s5, v21 ; GFX1013-NEXT: v_readfirstlane_b32 s6, v14 ; GFX1013-NEXT: v_readfirstlane_b32 s7, v15 -; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[12:13] +; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[20:21] ; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[14:15] ; GFX1013-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1013-NEXT: s_and_saveexec_b32 s0, s0 ; GFX1013-NEXT: image_bvh64_intersect_ray v[16:19], v[0:11], s[4:7] -; GFX1013-NEXT: ; implicit-def: $vgpr12 +; GFX1013-NEXT: ; implicit-def: $vgpr20 ; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 ; GFX1013-NEXT: ; implicit-def: $vgpr12_vgpr13_vgpr14_vgpr15 ; GFX1013-NEXT: s_waitcnt_depctr 0xffe3 @@ -450,20 +465,22 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr ; GFX11-NEXT: v_dual_mov_b32 v19, v0 :: v_dual_mov_b32 v20, v1 ; GFX11-NEXT: v_dual_mov_b32 v21, v2 :: v_dual_mov_b32 v16, v3 ; GFX11-NEXT: v_dual_mov_b32 v17, v4 :: v_dual_mov_b32 v18, v5 +; GFX11-NEXT: v_dual_mov_b32 v4, v12 :: v_dual_mov_b32 v5, v13 ; GFX11-NEXT: s_mov_b32 s1, exec_lo ; GFX11-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-NEXT: v_readfirstlane_b32 s4, v12 -; GFX11-NEXT: v_readfirstlane_b32 s5, v13 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_readfirstlane_b32 s4, v4 +; GFX11-NEXT: v_readfirstlane_b32 s5, v5 ; GFX11-NEXT: v_readfirstlane_b32 s6, v14 ; GFX11-NEXT: v_readfirstlane_b32 s7, v15 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[12:13] +; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[4:5] ; GFX11-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[14:15] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX11-NEXT: s_and_saveexec_b32 s0, s0 ; GFX11-NEXT: image_bvh64_intersect_ray v[0:3], [v[19:20], v21, v[16:18], v[6:8], v[9:11]], s[4:7] -; GFX11-NEXT: ; implicit-def: $vgpr12 +; GFX11-NEXT: ; implicit-def: $vgpr4 ; GFX11-NEXT: ; implicit-def: $vgpr19_vgpr20 ; GFX11-NEXT: ; implicit-def: $vgpr21 ; GFX11-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18 @@ -484,41 +501,43 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x half> %ray_dir, <3 x half> %ray_inv_dir, <4 x i32> %tdescr) { ; GFX1030-LABEL: image_bvh64_intersect_ray_a16_vgpr_descr: ; GFX1030: ; %bb.0: -; GFX1030-NEXT: v_mov_b32_e32 v14, v0 -; GFX1030-NEXT: v_mov_b32_e32 v15, v1 +; GFX1030-NEXT: v_mov_b32_e32 v19, v0 +; GFX1030-NEXT: v_mov_b32_e32 v20, v1 ; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v6 ; GFX1030-NEXT: v_and_b32_e32 v1, 0xffff, v8 -; GFX1030-NEXT: v_mov_b32_e32 v16, v2 +; GFX1030-NEXT: v_mov_b32_e32 v21, v2 ; GFX1030-NEXT: v_and_b32_e32 v2, 0xffff, v9 -; GFX1030-NEXT: v_mov_b32_e32 v17, v3 +; GFX1030-NEXT: v_mov_b32_e32 v22, v3 ; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX1030-NEXT: v_mov_b32_e32 v18, v4 -; GFX1030-NEXT: v_mov_b32_e32 v19, v5 -; GFX1030-NEXT: v_alignbit_b32 v22, v2, v8, 16 -; GFX1030-NEXT: v_and_or_b32 v20, 0xffff, v6, v0 -; GFX1030-NEXT: v_and_or_b32 v21, 0xffff, v7, v1 +; GFX1030-NEXT: v_mov_b32_e32 v23, v4 +; GFX1030-NEXT: v_mov_b32_e32 v24, v5 +; GFX1030-NEXT: v_mov_b32_e32 v17, v10 +; GFX1030-NEXT: v_mov_b32_e32 v18, v11 +; GFX1030-NEXT: v_and_or_b32 v25, 0xffff, v6, v0 +; GFX1030-NEXT: v_and_or_b32 v26, 0xffff, v7, v1 +; GFX1030-NEXT: v_alignbit_b32 v27, v2, v8, 16 ; GFX1030-NEXT: s_mov_b32 s1, exec_lo ; GFX1030-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 -; GFX1030-NEXT: v_readfirstlane_b32 s4, v10 -; GFX1030-NEXT: v_readfirstlane_b32 s5, v11 +; GFX1030-NEXT: v_readfirstlane_b32 s4, v17 +; GFX1030-NEXT: v_readfirstlane_b32 s5, v18 ; GFX1030-NEXT: v_readfirstlane_b32 s6, v12 ; GFX1030-NEXT: v_readfirstlane_b32 s7, v13 -; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[10:11] +; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[17:18] ; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[12:13] ; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1030-NEXT: s_and_saveexec_b32 s0, s0 -; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[14:22], s[4:7] a16 -; GFX1030-NEXT: ; implicit-def: $vgpr10 -; GFX1030-NEXT: ; implicit-def: $vgpr14 -; GFX1030-NEXT: ; implicit-def: $vgpr15 -; GFX1030-NEXT: ; implicit-def: $vgpr16 +; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[19:27], s[4:7] a16 ; GFX1030-NEXT: ; implicit-def: $vgpr17 -; GFX1030-NEXT: ; implicit-def: $vgpr18 ; GFX1030-NEXT: ; implicit-def: $vgpr19 ; GFX1030-NEXT: ; implicit-def: $vgpr20 ; GFX1030-NEXT: ; implicit-def: $vgpr21 ; GFX1030-NEXT: ; implicit-def: $vgpr22 +; GFX1030-NEXT: ; implicit-def: $vgpr23 +; GFX1030-NEXT: ; implicit-def: $vgpr24 +; GFX1030-NEXT: ; implicit-def: $vgpr25 +; GFX1030-NEXT: ; implicit-def: $vgpr26 +; GFX1030-NEXT: ; implicit-def: $vgpr27 ; GFX1030-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13 ; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1030-NEXT: s_cbranch_execnz .LBB9_1 @@ -529,26 +548,28 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node ; ; GFX1013-LABEL: image_bvh64_intersect_ray_a16_vgpr_descr: ; GFX1013: ; %bb.0: -; GFX1013-NEXT: v_lshrrev_b32_e32 v14, 16, v6 -; GFX1013-NEXT: v_and_b32_e32 v15, 0xffff, v8 +; GFX1013-NEXT: v_mov_b32_e32 v18, v10 +; GFX1013-NEXT: v_mov_b32_e32 v19, v11 +; GFX1013-NEXT: v_lshrrev_b32_e32 v10, 16, v6 +; GFX1013-NEXT: v_and_b32_e32 v11, 0xffff, v8 ; GFX1013-NEXT: v_and_b32_e32 v9, 0xffff, v9 ; GFX1013-NEXT: s_mov_b32 s1, exec_lo -; GFX1013-NEXT: v_lshlrev_b32_e32 v14, 16, v14 -; GFX1013-NEXT: v_lshlrev_b32_e32 v15, 16, v15 +; GFX1013-NEXT: v_lshlrev_b32_e32 v10, 16, v10 +; GFX1013-NEXT: v_lshlrev_b32_e32 v11, 16, v11 ; GFX1013-NEXT: v_alignbit_b32 v8, v9, v8, 16 -; GFX1013-NEXT: v_and_or_b32 v6, 0xffff, v6, v14 -; GFX1013-NEXT: v_and_or_b32 v7, 0xffff, v7, v15 +; GFX1013-NEXT: v_and_or_b32 v6, 0xffff, v6, v10 +; GFX1013-NEXT: v_and_or_b32 v7, 0xffff, v7, v11 ; GFX1013-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 -; GFX1013-NEXT: v_readfirstlane_b32 s4, v10 -; GFX1013-NEXT: v_readfirstlane_b32 s5, v11 +; GFX1013-NEXT: v_readfirstlane_b32 s4, v18 +; GFX1013-NEXT: v_readfirstlane_b32 s5, v19 ; GFX1013-NEXT: v_readfirstlane_b32 s6, v12 ; GFX1013-NEXT: v_readfirstlane_b32 s7, v13 -; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[10:11] +; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[18:19] ; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[12:13] ; GFX1013-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1013-NEXT: s_and_saveexec_b32 s0, s0 ; GFX1013-NEXT: image_bvh64_intersect_ray v[14:17], v[0:8], s[4:7] a16 -; GFX1013-NEXT: ; implicit-def: $vgpr10 +; GFX1013-NEXT: ; implicit-def: $vgpr18 ; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 ; GFX1013-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13 ; GFX1013-NEXT: s_waitcnt_depctr 0xffe3 @@ -570,28 +591,28 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node ; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v9 ; GFX11-NEXT: v_dual_mov_b32 v19, v2 :: v_dual_mov_b32 v14, v3 ; GFX11-NEXT: v_dual_mov_b32 v15, v4 :: v_dual_mov_b32 v16, v5 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-NEXT: v_lshl_or_b32 v4, v6, 16, v0 -; GFX11-NEXT: v_perm_b32 v5, v6, v8, 0x7060302 -; GFX11-NEXT: v_lshl_or_b32 v6, v7, 16, v1 +; GFX11-NEXT: v_dual_mov_b32 v4, v10 :: v_dual_mov_b32 v5, v11 +; GFX11-NEXT: v_lshl_or_b32 v20, v6, 16, v0 +; GFX11-NEXT: v_perm_b32 v21, v6, v8, 0x7060302 +; GFX11-NEXT: v_lshl_or_b32 v22, v7, 16, v1 ; GFX11-NEXT: s_mov_b32 s1, exec_lo ; GFX11-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-NEXT: v_readfirstlane_b32 s4, v10 -; GFX11-NEXT: v_readfirstlane_b32 s5, v11 +; GFX11-NEXT: v_readfirstlane_b32 s4, v4 +; GFX11-NEXT: v_readfirstlane_b32 s5, v5 ; GFX11-NEXT: v_readfirstlane_b32 s6, v12 ; GFX11-NEXT: v_readfirstlane_b32 s7, v13 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[10:11] +; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[4:5] ; GFX11-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[12:13] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX11-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-NEXT: image_bvh64_intersect_ray v[0:3], [v[17:18], v19, v[14:16], v[4:6]], s[4:7] a16 -; GFX11-NEXT: ; implicit-def: $vgpr10 +; GFX11-NEXT: image_bvh64_intersect_ray v[0:3], [v[17:18], v19, v[14:16], v[20:22]], s[4:7] a16 +; GFX11-NEXT: ; implicit-def: $vgpr4 ; GFX11-NEXT: ; implicit-def: $vgpr17_vgpr18 ; GFX11-NEXT: ; implicit-def: $vgpr19 ; GFX11-NEXT: ; implicit-def: $vgpr14_vgpr15_vgpr16 -; GFX11-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6 +; GFX11-NEXT: ; implicit-def: $vgpr20_vgpr21_vgpr22 ; GFX11-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13 ; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX11-NEXT: s_cbranch_execnz .LBB9_1 diff --git a/llvm/test/CodeGen/AMDGPU/live-interval-bug-in-rename-independent-subregs.mir b/llvm/test/CodeGen/AMDGPU/live-interval-bug-in-rename-independent-subregs.mir index f990217aeae07..98b1b69101e51 100644 --- a/llvm/test/CodeGen/AMDGPU/live-interval-bug-in-rename-independent-subregs.mir +++ b/llvm/test/CodeGen/AMDGPU/live-interval-bug-in-rename-independent-subregs.mir @@ -23,14 +23,14 @@ body: | ; REG_ALLOC-NEXT: renamable $sgpr6_sgpr7 = V_CMP_NE_U32_e64 killed $vgpr1, 0, implicit $exec ; REG_ALLOC-NEXT: S_CMP_EQ_U64 killed renamable $sgpr12_sgpr13, killed renamable $sgpr2_sgpr3, implicit-def $scc ; REG_ALLOC-NEXT: renamable $sgpr1 = S_CSELECT_B32 1, 0, implicit $scc - ; REG_ALLOC-NEXT: renamable $vgpr0 = IMPLICIT_DEF + ; REG_ALLOC-NEXT: renamable $vgpr8 = IMPLICIT_DEF ; REG_ALLOC-NEXT: $exec = S_MOV_B64_term renamable $sgpr6_sgpr7 ; REG_ALLOC-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec ; REG_ALLOC-NEXT: S_BRANCH %bb.2 ; REG_ALLOC-NEXT: {{ $}} ; REG_ALLOC-NEXT: bb.1: ; REG_ALLOC-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000) - ; REG_ALLOC-NEXT: liveins: $sgpr0, $vgpr0, $vgpr10, $sgpr4_sgpr5, $sgpr6_sgpr7, $vgpr11_vgpr12_vgpr13_vgpr14:0x0000000000000003, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7:0x0000000000000300 + ; REG_ALLOC-NEXT: liveins: $sgpr0, $vgpr8, $vgpr10, $sgpr4_sgpr5, $sgpr6_sgpr7, $vgpr11_vgpr12_vgpr13_vgpr14:0x0000000000000003, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7:0x0000000000000300 ; REG_ALLOC-NEXT: {{ $}} ; REG_ALLOC-NEXT: renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec ; REG_ALLOC-NEXT: $exec = S_XOR_B64_term $exec, renamable $sgpr2_sgpr3, implicit-def $scc @@ -42,7 +42,7 @@ body: | ; REG_ALLOC-NEXT: liveins: $sgpr0, $sgpr1, $vgpr10, $sgpr4_sgpr5, $sgpr6_sgpr7 ; REG_ALLOC-NEXT: {{ $}} ; REG_ALLOC-NEXT: renamable $sgpr1 = S_OR_B32 killed renamable $sgpr1, 2, implicit-def dead $scc - ; REG_ALLOC-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr1 + ; REG_ALLOC-NEXT: renamable $vgpr8 = COPY killed renamable $sgpr1 ; REG_ALLOC-NEXT: renamable $vgpr11_vgpr12 = IMPLICIT_DEF ; REG_ALLOC-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; REG_ALLOC-NEXT: S_BRANCH %bb.1 @@ -55,20 +55,20 @@ body: | ; REG_ALLOC-NEXT: renamable $sgpr6 = V_READFIRSTLANE_B32 killed $vgpr4, implicit $exec ; REG_ALLOC-NEXT: S_CMP_EQ_U32 killed renamable $sgpr6, killed renamable $sgpr1, implicit-def $scc ; REG_ALLOC-NEXT: renamable $sgpr1 = S_CSELECT_B32 1, 0, implicit $scc - ; REG_ALLOC-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr1 + ; REG_ALLOC-NEXT: renamable $vgpr8 = COPY killed renamable $sgpr1 ; REG_ALLOC-NEXT: S_BRANCH %bb.5 ; REG_ALLOC-NEXT: {{ $}} ; REG_ALLOC-NEXT: bb.4: - ; REG_ALLOC-NEXT: liveins: $sgpr0, $vgpr0, $vgpr10, $sgpr4_sgpr5 + ; REG_ALLOC-NEXT: liveins: $sgpr0, $vgpr8, $vgpr10, $sgpr4_sgpr5 ; REG_ALLOC-NEXT: {{ $}} ; REG_ALLOC-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (<4 x s32>), addrspace 4) - ; REG_ALLOC-NEXT: renamable $vgpr1 = V_ADD_U32_e64 killed $sgpr0, killed $vgpr10, 0, implicit $exec - ; REG_ALLOC-NEXT: BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; REG_ALLOC-NEXT: renamable $vgpr0 = V_ADD_U32_e64 killed $sgpr0, killed $vgpr10, 0, implicit $exec + ; REG_ALLOC-NEXT: BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr8, killed renamable $vgpr0, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) ; REG_ALLOC-NEXT: S_ENDPGM 0 ; REG_ALLOC-NEXT: {{ $}} ; REG_ALLOC-NEXT: bb.5: ; REG_ALLOC-NEXT: successors: %bb.4(0x80000000) - ; REG_ALLOC-NEXT: liveins: $sgpr0, $vgpr0, $vgpr10, $sgpr2_sgpr3, $sgpr4_sgpr5 + ; REG_ALLOC-NEXT: liveins: $sgpr0, $vgpr8, $vgpr10, $sgpr2_sgpr3, $sgpr4_sgpr5 ; REG_ALLOC-NEXT: {{ $}} ; REG_ALLOC-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc ; REG_ALLOC-NEXT: S_BRANCH %bb.4 @@ -90,14 +90,14 @@ body: | ; DEAD_INST_DEL-NEXT: renamable $sgpr6_sgpr7 = V_CMP_NE_U32_e64 killed $vgpr1, 0, implicit $exec ; DEAD_INST_DEL-NEXT: S_CMP_EQ_U64 killed renamable $sgpr12_sgpr13, killed renamable $sgpr2_sgpr3, implicit-def $scc ; DEAD_INST_DEL-NEXT: renamable $sgpr1 = S_CSELECT_B32 1, 0, implicit $scc - ; DEAD_INST_DEL-NEXT: renamable $vgpr0 = IMPLICIT_DEF + ; DEAD_INST_DEL-NEXT: renamable $vgpr8 = IMPLICIT_DEF ; DEAD_INST_DEL-NEXT: $exec = S_MOV_B64_term renamable $sgpr6_sgpr7 ; DEAD_INST_DEL-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec ; DEAD_INST_DEL-NEXT: S_BRANCH %bb.2 ; DEAD_INST_DEL-NEXT: {{ $}} ; DEAD_INST_DEL-NEXT: bb.1: ; DEAD_INST_DEL-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000) - ; DEAD_INST_DEL-NEXT: liveins: $sgpr0, $vgpr0, $vgpr10, $sgpr4_sgpr5, $sgpr6_sgpr7, $vgpr11_vgpr12_vgpr13_vgpr14:0x0000000000000003, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7:0x0000000000000300 + ; DEAD_INST_DEL-NEXT: liveins: $sgpr0, $vgpr8, $vgpr10, $sgpr4_sgpr5, $sgpr6_sgpr7, $vgpr11_vgpr12_vgpr13_vgpr14:0x0000000000000003, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7:0x0000000000000300 ; DEAD_INST_DEL-NEXT: {{ $}} ; DEAD_INST_DEL-NEXT: renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec ; DEAD_INST_DEL-NEXT: $exec = S_XOR_B64_term $exec, renamable $sgpr2_sgpr3, implicit-def $scc @@ -109,6 +109,7 @@ body: | ; DEAD_INST_DEL-NEXT: liveins: $sgpr0, $sgpr1, $vgpr10, $sgpr4_sgpr5, $sgpr6_sgpr7 ; DEAD_INST_DEL-NEXT: {{ $}} ; DEAD_INST_DEL-NEXT: renamable $sgpr1 = S_OR_B32 killed renamable $sgpr1, 2, implicit-def dead $scc + ; DEAD_INST_DEL-NEXT: renamable $vgpr8 = COPY killed renamable $sgpr1 ; DEAD_INST_DEL-NEXT: renamable $vgpr11_vgpr12 = IMPLICIT_DEF ; DEAD_INST_DEL-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF ; DEAD_INST_DEL-NEXT: S_BRANCH %bb.1 @@ -121,20 +122,20 @@ body: | ; DEAD_INST_DEL-NEXT: renamable $sgpr6 = V_READFIRSTLANE_B32 killed $vgpr4, implicit $exec ; DEAD_INST_DEL-NEXT: S_CMP_EQ_U32 killed renamable $sgpr6, killed renamable $sgpr1, implicit-def $scc ; DEAD_INST_DEL-NEXT: renamable $sgpr1 = S_CSELECT_B32 1, 0, implicit $scc - ; DEAD_INST_DEL-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr1 + ; DEAD_INST_DEL-NEXT: renamable $vgpr8 = COPY killed renamable $sgpr1 ; DEAD_INST_DEL-NEXT: S_BRANCH %bb.5 ; DEAD_INST_DEL-NEXT: {{ $}} ; DEAD_INST_DEL-NEXT: bb.4: - ; DEAD_INST_DEL-NEXT: liveins: $sgpr0, $vgpr0, $vgpr10, $sgpr4_sgpr5 + ; DEAD_INST_DEL-NEXT: liveins: $sgpr0, $vgpr8, $vgpr10, $sgpr4_sgpr5 ; DEAD_INST_DEL-NEXT: {{ $}} ; DEAD_INST_DEL-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (<4 x s32>), addrspace 4) - ; DEAD_INST_DEL-NEXT: renamable $vgpr1 = V_ADD_U32_e64 killed $sgpr0, killed $vgpr10, 0, implicit $exec - ; DEAD_INST_DEL-NEXT: BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; DEAD_INST_DEL-NEXT: renamable $vgpr0 = V_ADD_U32_e64 killed $sgpr0, killed $vgpr10, 0, implicit $exec + ; DEAD_INST_DEL-NEXT: BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr8, killed renamable $vgpr0, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) ; DEAD_INST_DEL-NEXT: S_ENDPGM 0 ; DEAD_INST_DEL-NEXT: {{ $}} ; DEAD_INST_DEL-NEXT: bb.5: ; DEAD_INST_DEL-NEXT: successors: %bb.4(0x80000000) - ; DEAD_INST_DEL-NEXT: liveins: $sgpr0, $vgpr0, $vgpr10, $sgpr2_sgpr3, $sgpr4_sgpr5 + ; DEAD_INST_DEL-NEXT: liveins: $sgpr0, $vgpr8, $vgpr10, $sgpr2_sgpr3, $sgpr4_sgpr5 ; DEAD_INST_DEL-NEXT: {{ $}} ; DEAD_INST_DEL-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc ; DEAD_INST_DEL-NEXT: S_BRANCH %bb.4