diff --git a/llvm/lib/CodeGen/RenameIndependentSubregs.cpp b/llvm/lib/CodeGen/RenameIndependentSubregs.cpp index 83a9c0d738394..982f31ca78f51 100644 --- a/llvm/lib/CodeGen/RenameIndependentSubregs.cpp +++ b/llvm/lib/CodeGen/RenameIndependentSubregs.cpp @@ -306,6 +306,7 @@ void RenameIndependentSubregs::computeMainRangesFixFlags( const IntEqClasses &Classes, const SmallVectorImpl &SubRangeInfos, const SmallVectorImpl &Intervals) const { + const TargetRegisterInfo &TRI = TII->getRegisterInfo(); BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); const SlotIndexes &Indexes = *LIS->getSlotIndexes(); for (size_t I = 0, E = Intervals.size(); I < E; ++I) { @@ -314,6 +315,25 @@ void RenameIndependentSubregs::computeMainRangesFixFlags( LI.removeEmptySubRanges(); + // Try to establish a single subregister which covers all uses. + // Note: this is assuming the selected subregister will only be + // used for fixing up live intervals issues created by this pass. + LaneBitmask UsedMask, UnusedMask; + for (LiveInterval::SubRange &SR : LI.subranges()) + UsedMask |= SR.LaneMask; + SmallVector SubRegIdxs; + unsigned Flags = 0; + unsigned SubReg = 0; + // TODO: Handle SubRegIdxs.size() > 1 + if (TRI.getCoveringSubRegIndexes(MRI->getRegClass(Reg), UsedMask, + SubRegIdxs) && + SubRegIdxs.size() == 1) { + SubReg = SubRegIdxs.front(); + Flags = RegState::Undef; + } else { + UnusedMask = MRI->getMaxLaneMaskForVReg(Reg) & ~UsedMask; + } + // There must be a def (or live-in) before every use. Splitting vregs may // violate this principle as the splitted vreg may not have a definition on // every path. Fix this by creating IMPLICIT_DEF instruction as necessary. @@ -336,19 +356,18 @@ void RenameIndependentSubregs::computeMainRangesFixFlags( MachineBasicBlock::iterator InsertPos = llvm::findPHICopyInsertPoint(PredMBB, &MBB, Reg); const MCInstrDesc &MCDesc = TII->get(TargetOpcode::IMPLICIT_DEF); - MachineInstrBuilder ImpDef = BuildMI(*PredMBB, InsertPos, - DebugLoc(), MCDesc, Reg); + MachineInstrBuilder ImpDef = + BuildMI(*PredMBB, InsertPos, DebugLoc(), MCDesc) + .addDef(Reg, Flags, SubReg); SlotIndex DefIdx = LIS->InsertMachineInstrInMaps(*ImpDef); SlotIndex RegDefIdx = DefIdx.getRegSlot(); - LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(Reg); for (LiveInterval::SubRange &SR : LI.subranges()) { - Mask = Mask & ~SR.LaneMask; VNInfo *SRVNI = SR.getNextValue(RegDefIdx, Allocator); SR.addSegment(LiveRange::Segment(RegDefIdx, PredEnd, SRVNI)); } - - if (!Mask.none()) { - LiveInterval::SubRange *SR = LI.createSubRange(Allocator, Mask); + if (!UnusedMask.none()) { + LiveInterval::SubRange *SR = + LI.createSubRange(Allocator, UnusedMask); SR->createDeadDef(RegDefIdx, Allocator); } } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll index b0ca1e8ef3dff..cbf17bd71a69e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll @@ -144,43 +144,41 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16(i64 %node_ptr, float define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> %tdescr) { ; GFX1030-LABEL: image_bvh_intersect_ray_vgpr_descr: ; GFX1030: ; %bb.0: -; GFX1030-NEXT: v_mov_b32_e32 v21, v0 -; GFX1030-NEXT: v_mov_b32_e32 v22, v1 -; GFX1030-NEXT: v_mov_b32_e32 v23, v2 -; GFX1030-NEXT: v_mov_b32_e32 v24, v3 -; GFX1030-NEXT: v_mov_b32_e32 v25, v4 -; GFX1030-NEXT: v_mov_b32_e32 v26, v5 -; GFX1030-NEXT: v_mov_b32_e32 v27, v6 -; GFX1030-NEXT: v_mov_b32_e32 v28, v7 -; GFX1030-NEXT: v_mov_b32_e32 v29, v8 -; GFX1030-NEXT: v_mov_b32_e32 v30, v9 -; GFX1030-NEXT: v_mov_b32_e32 v31, v10 -; GFX1030-NEXT: v_mov_b32_e32 v19, v11 -; GFX1030-NEXT: v_mov_b32_e32 v20, v12 +; GFX1030-NEXT: v_mov_b32_e32 v15, v0 +; GFX1030-NEXT: v_mov_b32_e32 v16, v1 +; GFX1030-NEXT: v_mov_b32_e32 v17, v2 +; GFX1030-NEXT: v_mov_b32_e32 v18, v3 +; GFX1030-NEXT: v_mov_b32_e32 v19, v4 +; GFX1030-NEXT: v_mov_b32_e32 v20, v5 +; GFX1030-NEXT: v_mov_b32_e32 v21, v6 +; GFX1030-NEXT: v_mov_b32_e32 v22, v7 +; GFX1030-NEXT: v_mov_b32_e32 v23, v8 +; GFX1030-NEXT: v_mov_b32_e32 v24, v9 +; GFX1030-NEXT: v_mov_b32_e32 v25, v10 ; GFX1030-NEXT: s_mov_b32 s1, exec_lo ; GFX1030-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 -; GFX1030-NEXT: v_readfirstlane_b32 s4, v19 -; GFX1030-NEXT: v_readfirstlane_b32 s5, v20 +; GFX1030-NEXT: v_readfirstlane_b32 s4, v11 +; GFX1030-NEXT: v_readfirstlane_b32 s5, v12 ; GFX1030-NEXT: v_readfirstlane_b32 s6, v13 ; GFX1030-NEXT: v_readfirstlane_b32 s7, v14 -; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[19:20] +; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[11:12] ; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[13:14] ; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1030-NEXT: s_and_saveexec_b32 s0, s0 -; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[21:31], s[4:7] +; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[15:25], s[4:7] +; GFX1030-NEXT: ; implicit-def: $vgpr11 +; GFX1030-NEXT: ; implicit-def: $vgpr15 +; GFX1030-NEXT: ; implicit-def: $vgpr16 +; GFX1030-NEXT: ; implicit-def: $vgpr17 +; GFX1030-NEXT: ; implicit-def: $vgpr18 ; GFX1030-NEXT: ; implicit-def: $vgpr19 +; GFX1030-NEXT: ; implicit-def: $vgpr20 ; GFX1030-NEXT: ; implicit-def: $vgpr21 ; GFX1030-NEXT: ; implicit-def: $vgpr22 ; GFX1030-NEXT: ; implicit-def: $vgpr23 ; GFX1030-NEXT: ; implicit-def: $vgpr24 ; GFX1030-NEXT: ; implicit-def: $vgpr25 -; GFX1030-NEXT: ; implicit-def: $vgpr26 -; GFX1030-NEXT: ; implicit-def: $vgpr27 -; GFX1030-NEXT: ; implicit-def: $vgpr28 -; GFX1030-NEXT: ; implicit-def: $vgpr29 -; GFX1030-NEXT: ; implicit-def: $vgpr30 -; GFX1030-NEXT: ; implicit-def: $vgpr31 -; GFX1030-NEXT: ; implicit-def: $vgpr11_vgpr12_vgpr13_vgpr14 +; GFX1030-NEXT: ; implicit-def: $vgpr13_vgpr14 ; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1030-NEXT: s_cbranch_execnz .LBB6_1 ; GFX1030-NEXT: ; %bb.2: @@ -190,22 +188,20 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr, ; ; GFX1013-LABEL: image_bvh_intersect_ray_vgpr_descr: ; GFX1013: ; %bb.0: -; GFX1013-NEXT: v_mov_b32_e32 v19, v11 -; GFX1013-NEXT: v_mov_b32_e32 v20, v12 ; GFX1013-NEXT: s_mov_b32 s1, exec_lo ; GFX1013-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 -; GFX1013-NEXT: v_readfirstlane_b32 s4, v19 -; GFX1013-NEXT: v_readfirstlane_b32 s5, v20 +; GFX1013-NEXT: v_readfirstlane_b32 s4, v11 +; GFX1013-NEXT: v_readfirstlane_b32 s5, v12 ; GFX1013-NEXT: v_readfirstlane_b32 s6, v13 ; GFX1013-NEXT: v_readfirstlane_b32 s7, v14 -; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[19:20] +; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[11:12] ; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[13:14] ; GFX1013-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1013-NEXT: s_and_saveexec_b32 s0, s0 ; GFX1013-NEXT: image_bvh_intersect_ray v[15:18], v[0:10], s[4:7] -; GFX1013-NEXT: ; implicit-def: $vgpr19 +; GFX1013-NEXT: ; implicit-def: $vgpr11 ; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10 -; GFX1013-NEXT: ; implicit-def: $vgpr11_vgpr12_vgpr13_vgpr14 +; GFX1013-NEXT: ; implicit-def: $vgpr13_vgpr14 ; GFX1013-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1013-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1013-NEXT: s_cbranch_execnz .LBB6_1 @@ -220,31 +216,29 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr, ; ; GFX11-LABEL: image_bvh_intersect_ray_vgpr_descr: ; GFX11: ; %bb.0: -; GFX11-NEXT: v_dual_mov_b32 v20, v0 :: v_dual_mov_b32 v21, v1 +; GFX11-NEXT: v_dual_mov_b32 v18, v0 :: v_dual_mov_b32 v19, v1 ; GFX11-NEXT: v_dual_mov_b32 v15, v2 :: v_dual_mov_b32 v16, v3 -; GFX11-NEXT: v_dual_mov_b32 v17, v4 :: v_dual_mov_b32 v18, v11 -; GFX11-NEXT: v_mov_b32_e32 v19, v12 +; GFX11-NEXT: v_mov_b32_e32 v17, v4 ; GFX11-NEXT: s_mov_b32 s1, exec_lo ; GFX11-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_readfirstlane_b32 s4, v18 -; GFX11-NEXT: v_readfirstlane_b32 s5, v19 +; GFX11-NEXT: v_readfirstlane_b32 s4, v11 +; GFX11-NEXT: v_readfirstlane_b32 s5, v12 ; GFX11-NEXT: v_readfirstlane_b32 s6, v13 ; GFX11-NEXT: v_readfirstlane_b32 s7, v14 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[18:19] +; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[11:12] ; GFX11-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[13:14] ; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-NEXT: image_bvh_intersect_ray v[0:3], [v20, v21, v[15:17], v[5:7], v[8:10]], s[4:7] +; GFX11-NEXT: image_bvh_intersect_ray v[0:3], [v18, v19, v[15:17], v[5:7], v[8:10]], s[4:7] +; GFX11-NEXT: ; implicit-def: $vgpr11 ; GFX11-NEXT: ; implicit-def: $vgpr18 -; GFX11-NEXT: ; implicit-def: $vgpr20 -; GFX11-NEXT: ; implicit-def: $vgpr21 +; GFX11-NEXT: ; implicit-def: $vgpr19 ; GFX11-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17 ; GFX11-NEXT: ; implicit-def: $vgpr5_vgpr6_vgpr7 ; GFX11-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10 -; GFX11-NEXT: ; implicit-def: $vgpr11_vgpr12_vgpr13_vgpr14 +; GFX11-NEXT: ; implicit-def: $vgpr13_vgpr14 ; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX11-NEXT: s_cbranch_execnz .LBB6_1 ; GFX11-NEXT: ; %bb.2: @@ -259,42 +253,40 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr, define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x half> %ray_dir, <3 x half> %ray_inv_dir, <4 x i32> %tdescr) { ; GFX1030-LABEL: image_bvh_intersect_ray_a16_vgpr_descr: ; GFX1030: ; %bb.0: -; GFX1030-NEXT: v_mov_b32_e32 v18, v0 -; GFX1030-NEXT: v_mov_b32_e32 v19, v1 +; GFX1030-NEXT: v_mov_b32_e32 v13, v0 +; GFX1030-NEXT: v_mov_b32_e32 v14, v1 ; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v5 ; GFX1030-NEXT: v_and_b32_e32 v1, 0xffff, v7 -; GFX1030-NEXT: v_mov_b32_e32 v20, v2 +; GFX1030-NEXT: v_mov_b32_e32 v15, v2 ; GFX1030-NEXT: v_and_b32_e32 v2, 0xffff, v8 -; GFX1030-NEXT: v_mov_b32_e32 v21, v3 +; GFX1030-NEXT: v_mov_b32_e32 v16, v3 ; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX1030-NEXT: v_mov_b32_e32 v22, v4 -; GFX1030-NEXT: v_mov_b32_e32 v16, v9 -; GFX1030-NEXT: v_mov_b32_e32 v17, v10 -; GFX1030-NEXT: v_and_or_b32 v23, 0xffff, v5, v0 -; GFX1030-NEXT: v_and_or_b32 v24, 0xffff, v6, v1 -; GFX1030-NEXT: v_alignbit_b32 v25, v2, v7, 16 +; GFX1030-NEXT: v_mov_b32_e32 v17, v4 +; GFX1030-NEXT: v_alignbit_b32 v20, v2, v7, 16 ; GFX1030-NEXT: s_mov_b32 s1, exec_lo +; GFX1030-NEXT: v_and_or_b32 v18, 0xffff, v5, v0 +; GFX1030-NEXT: v_and_or_b32 v19, 0xffff, v6, v1 ; GFX1030-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 -; GFX1030-NEXT: v_readfirstlane_b32 s4, v16 -; GFX1030-NEXT: v_readfirstlane_b32 s5, v17 +; GFX1030-NEXT: v_readfirstlane_b32 s4, v9 +; GFX1030-NEXT: v_readfirstlane_b32 s5, v10 ; GFX1030-NEXT: v_readfirstlane_b32 s6, v11 ; GFX1030-NEXT: v_readfirstlane_b32 s7, v12 -; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[16:17] +; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[9:10] ; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[11:12] ; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1030-NEXT: s_and_saveexec_b32 s0, s0 -; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[18:25], s[4:7] a16 +; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[13:20], s[4:7] a16 +; GFX1030-NEXT: ; implicit-def: $vgpr9 +; GFX1030-NEXT: ; implicit-def: $vgpr13 +; GFX1030-NEXT: ; implicit-def: $vgpr14 +; GFX1030-NEXT: ; implicit-def: $vgpr15 ; GFX1030-NEXT: ; implicit-def: $vgpr16 +; GFX1030-NEXT: ; implicit-def: $vgpr17 ; GFX1030-NEXT: ; implicit-def: $vgpr18 ; GFX1030-NEXT: ; implicit-def: $vgpr19 ; GFX1030-NEXT: ; implicit-def: $vgpr20 -; GFX1030-NEXT: ; implicit-def: $vgpr21 -; GFX1030-NEXT: ; implicit-def: $vgpr22 -; GFX1030-NEXT: ; implicit-def: $vgpr23 -; GFX1030-NEXT: ; implicit-def: $vgpr24 -; GFX1030-NEXT: ; implicit-def: $vgpr25 -; GFX1030-NEXT: ; implicit-def: $vgpr9_vgpr10_vgpr11_vgpr12 +; GFX1030-NEXT: ; implicit-def: $vgpr11_vgpr12 ; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1030-NEXT: s_cbranch_execnz .LBB7_1 ; GFX1030-NEXT: ; %bb.2: @@ -304,30 +296,28 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_p ; ; GFX1013-LABEL: image_bvh_intersect_ray_a16_vgpr_descr: ; GFX1013: ; %bb.0: -; GFX1013-NEXT: v_mov_b32_e32 v17, v9 -; GFX1013-NEXT: v_mov_b32_e32 v18, v10 -; GFX1013-NEXT: v_lshrrev_b32_e32 v9, 16, v5 -; GFX1013-NEXT: v_and_b32_e32 v10, 0xffff, v7 +; GFX1013-NEXT: v_lshrrev_b32_e32 v13, 16, v5 +; GFX1013-NEXT: v_and_b32_e32 v14, 0xffff, v7 ; GFX1013-NEXT: v_and_b32_e32 v8, 0xffff, v8 ; GFX1013-NEXT: s_mov_b32 s1, exec_lo -; GFX1013-NEXT: v_lshlrev_b32_e32 v9, 16, v9 -; GFX1013-NEXT: v_lshlrev_b32_e32 v10, 16, v10 +; GFX1013-NEXT: v_lshlrev_b32_e32 v13, 16, v13 +; GFX1013-NEXT: v_lshlrev_b32_e32 v14, 16, v14 ; GFX1013-NEXT: v_alignbit_b32 v7, v8, v7, 16 -; GFX1013-NEXT: v_and_or_b32 v5, 0xffff, v5, v9 -; GFX1013-NEXT: v_and_or_b32 v6, 0xffff, v6, v10 +; GFX1013-NEXT: v_and_or_b32 v5, 0xffff, v5, v13 +; GFX1013-NEXT: v_and_or_b32 v6, 0xffff, v6, v14 ; GFX1013-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 -; GFX1013-NEXT: v_readfirstlane_b32 s4, v17 -; GFX1013-NEXT: v_readfirstlane_b32 s5, v18 +; GFX1013-NEXT: v_readfirstlane_b32 s4, v9 +; GFX1013-NEXT: v_readfirstlane_b32 s5, v10 ; GFX1013-NEXT: v_readfirstlane_b32 s6, v11 ; GFX1013-NEXT: v_readfirstlane_b32 s7, v12 -; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[17:18] +; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[9:10] ; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[11:12] ; GFX1013-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1013-NEXT: s_and_saveexec_b32 s0, s0 ; GFX1013-NEXT: image_bvh_intersect_ray v[13:16], v[0:7], s[4:7] a16 -; GFX1013-NEXT: ; implicit-def: $vgpr17 +; GFX1013-NEXT: ; implicit-def: $vgpr9 ; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 -; GFX1013-NEXT: ; implicit-def: $vgpr9_vgpr10_vgpr11_vgpr12 +; GFX1013-NEXT: ; implicit-def: $vgpr11_vgpr12 ; GFX1013-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1013-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1013-NEXT: s_cbranch_execnz .LBB7_1 @@ -343,33 +333,32 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_p ; GFX11-LABEL: image_bvh_intersect_ray_a16_vgpr_descr: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_dual_mov_b32 v16, v0 :: v_dual_mov_b32 v17, v1 -; GFX11-NEXT: v_dual_mov_b32 v19, v10 :: v_dual_and_b32 v0, 0xffff, v7 +; GFX11-NEXT: v_dual_mov_b32 v15, v4 :: v_dual_and_b32 v0, 0xffff, v7 ; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v8 ; GFX11-NEXT: v_dual_mov_b32 v13, v2 :: v_dual_mov_b32 v14, v3 -; GFX11-NEXT: v_dual_mov_b32 v15, v4 :: v_dual_mov_b32 v18, v9 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-NEXT: s_mov_b32 s1, exec_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) ; GFX11-NEXT: v_lshl_or_b32 v4, v5, 16, v0 ; GFX11-NEXT: v_perm_b32 v5, v5, v7, 0x7060302 ; GFX11-NEXT: v_lshl_or_b32 v6, v6, 16, v1 -; GFX11-NEXT: s_mov_b32 s1, exec_lo ; GFX11-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-NEXT: v_readfirstlane_b32 s4, v18 -; GFX11-NEXT: v_readfirstlane_b32 s5, v19 +; GFX11-NEXT: v_readfirstlane_b32 s4, v9 +; GFX11-NEXT: v_readfirstlane_b32 s5, v10 ; GFX11-NEXT: v_readfirstlane_b32 s6, v11 ; GFX11-NEXT: v_readfirstlane_b32 s7, v12 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[18:19] +; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[9:10] ; GFX11-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[11:12] ; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_and_saveexec_b32 s0, s0 ; GFX11-NEXT: image_bvh_intersect_ray v[0:3], [v16, v17, v[13:15], v[4:6]], s[4:7] a16 -; GFX11-NEXT: ; implicit-def: $vgpr18 +; GFX11-NEXT: ; implicit-def: $vgpr9 ; GFX11-NEXT: ; implicit-def: $vgpr16 ; GFX11-NEXT: ; implicit-def: $vgpr17 ; GFX11-NEXT: ; implicit-def: $vgpr13_vgpr14_vgpr15 ; GFX11-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6 -; GFX11-NEXT: ; implicit-def: $vgpr9_vgpr10_vgpr11_vgpr12 +; GFX11-NEXT: ; implicit-def: $vgpr11_vgpr12 ; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX11-NEXT: s_cbranch_execnz .LBB7_1 ; GFX11-NEXT: ; %bb.2: @@ -384,45 +373,43 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_p define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> %tdescr) { ; GFX1030-LABEL: image_bvh64_intersect_ray_vgpr_descr: ; GFX1030: ; %bb.0: -; GFX1030-NEXT: v_mov_b32_e32 v22, v0 -; GFX1030-NEXT: v_mov_b32_e32 v23, v1 -; GFX1030-NEXT: v_mov_b32_e32 v24, v2 -; GFX1030-NEXT: v_mov_b32_e32 v25, v3 -; GFX1030-NEXT: v_mov_b32_e32 v26, v4 -; GFX1030-NEXT: v_mov_b32_e32 v27, v5 -; GFX1030-NEXT: v_mov_b32_e32 v28, v6 -; GFX1030-NEXT: v_mov_b32_e32 v29, v7 -; GFX1030-NEXT: v_mov_b32_e32 v30, v8 -; GFX1030-NEXT: v_mov_b32_e32 v31, v9 -; GFX1030-NEXT: v_mov_b32_e32 v32, v10 -; GFX1030-NEXT: v_mov_b32_e32 v33, v11 -; GFX1030-NEXT: v_mov_b32_e32 v20, v12 -; GFX1030-NEXT: v_mov_b32_e32 v21, v13 +; GFX1030-NEXT: v_mov_b32_e32 v16, v0 +; GFX1030-NEXT: v_mov_b32_e32 v17, v1 +; GFX1030-NEXT: v_mov_b32_e32 v18, v2 +; GFX1030-NEXT: v_mov_b32_e32 v19, v3 +; GFX1030-NEXT: v_mov_b32_e32 v20, v4 +; GFX1030-NEXT: v_mov_b32_e32 v21, v5 +; GFX1030-NEXT: v_mov_b32_e32 v22, v6 +; GFX1030-NEXT: v_mov_b32_e32 v23, v7 +; GFX1030-NEXT: v_mov_b32_e32 v24, v8 +; GFX1030-NEXT: v_mov_b32_e32 v25, v9 +; GFX1030-NEXT: v_mov_b32_e32 v26, v10 +; GFX1030-NEXT: v_mov_b32_e32 v27, v11 ; GFX1030-NEXT: s_mov_b32 s1, exec_lo ; GFX1030-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GFX1030-NEXT: v_readfirstlane_b32 s4, v20 -; GFX1030-NEXT: v_readfirstlane_b32 s5, v21 +; GFX1030-NEXT: v_readfirstlane_b32 s4, v12 +; GFX1030-NEXT: v_readfirstlane_b32 s5, v13 ; GFX1030-NEXT: v_readfirstlane_b32 s6, v14 ; GFX1030-NEXT: v_readfirstlane_b32 s7, v15 -; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[20:21] +; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[12:13] ; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[14:15] ; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1030-NEXT: s_and_saveexec_b32 s0, s0 -; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[22:33], s[4:7] +; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[16:27], s[4:7] +; GFX1030-NEXT: ; implicit-def: $vgpr12 +; GFX1030-NEXT: ; implicit-def: $vgpr16 +; GFX1030-NEXT: ; implicit-def: $vgpr17 +; GFX1030-NEXT: ; implicit-def: $vgpr18 +; GFX1030-NEXT: ; implicit-def: $vgpr19 ; GFX1030-NEXT: ; implicit-def: $vgpr20 +; GFX1030-NEXT: ; implicit-def: $vgpr21 ; GFX1030-NEXT: ; implicit-def: $vgpr22 ; GFX1030-NEXT: ; implicit-def: $vgpr23 ; GFX1030-NEXT: ; implicit-def: $vgpr24 ; GFX1030-NEXT: ; implicit-def: $vgpr25 ; GFX1030-NEXT: ; implicit-def: $vgpr26 ; GFX1030-NEXT: ; implicit-def: $vgpr27 -; GFX1030-NEXT: ; implicit-def: $vgpr28 -; GFX1030-NEXT: ; implicit-def: $vgpr29 -; GFX1030-NEXT: ; implicit-def: $vgpr30 -; GFX1030-NEXT: ; implicit-def: $vgpr31 -; GFX1030-NEXT: ; implicit-def: $vgpr32 -; GFX1030-NEXT: ; implicit-def: $vgpr33 -; GFX1030-NEXT: ; implicit-def: $vgpr12_vgpr13_vgpr14_vgpr15 +; GFX1030-NEXT: ; implicit-def: $vgpr14_vgpr15 ; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1030-NEXT: s_cbranch_execnz .LBB8_1 ; GFX1030-NEXT: ; %bb.2: @@ -432,22 +419,20 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr ; ; GFX1013-LABEL: image_bvh64_intersect_ray_vgpr_descr: ; GFX1013: ; %bb.0: -; GFX1013-NEXT: v_mov_b32_e32 v20, v12 -; GFX1013-NEXT: v_mov_b32_e32 v21, v13 ; GFX1013-NEXT: s_mov_b32 s1, exec_lo ; GFX1013-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GFX1013-NEXT: v_readfirstlane_b32 s4, v20 -; GFX1013-NEXT: v_readfirstlane_b32 s5, v21 +; GFX1013-NEXT: v_readfirstlane_b32 s4, v12 +; GFX1013-NEXT: v_readfirstlane_b32 s5, v13 ; GFX1013-NEXT: v_readfirstlane_b32 s6, v14 ; GFX1013-NEXT: v_readfirstlane_b32 s7, v15 -; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[20:21] +; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[12:13] ; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[14:15] ; GFX1013-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1013-NEXT: s_and_saveexec_b32 s0, s0 ; GFX1013-NEXT: image_bvh64_intersect_ray v[16:19], v[0:11], s[4:7] -; GFX1013-NEXT: ; implicit-def: $vgpr20 +; GFX1013-NEXT: ; implicit-def: $vgpr12 ; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 -; GFX1013-NEXT: ; implicit-def: $vgpr12_vgpr13_vgpr14_vgpr15 +; GFX1013-NEXT: ; implicit-def: $vgpr14_vgpr15 ; GFX1013-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1013-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1013-NEXT: s_cbranch_execnz .LBB8_1 @@ -465,28 +450,26 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr ; GFX11-NEXT: v_dual_mov_b32 v19, v0 :: v_dual_mov_b32 v20, v1 ; GFX11-NEXT: v_dual_mov_b32 v21, v2 :: v_dual_mov_b32 v16, v3 ; GFX11-NEXT: v_dual_mov_b32 v17, v4 :: v_dual_mov_b32 v18, v5 -; GFX11-NEXT: v_dual_mov_b32 v4, v12 :: v_dual_mov_b32 v5, v13 ; GFX11-NEXT: s_mov_b32 s1, exec_lo ; GFX11-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_readfirstlane_b32 s4, v4 -; GFX11-NEXT: v_readfirstlane_b32 s5, v5 +; GFX11-NEXT: v_readfirstlane_b32 s4, v12 +; GFX11-NEXT: v_readfirstlane_b32 s5, v13 ; GFX11-NEXT: v_readfirstlane_b32 s6, v14 ; GFX11-NEXT: v_readfirstlane_b32 s7, v15 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[4:5] +; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[12:13] ; GFX11-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[14:15] ; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_and_saveexec_b32 s0, s0 ; GFX11-NEXT: image_bvh64_intersect_ray v[0:3], [v[19:20], v21, v[16:18], v[6:8], v[9:11]], s[4:7] -; GFX11-NEXT: ; implicit-def: $vgpr4 +; GFX11-NEXT: ; implicit-def: $vgpr12 ; GFX11-NEXT: ; implicit-def: $vgpr19_vgpr20 ; GFX11-NEXT: ; implicit-def: $vgpr21 ; GFX11-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18 ; GFX11-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8 ; GFX11-NEXT: ; implicit-def: $vgpr9_vgpr10_vgpr11 -; GFX11-NEXT: ; implicit-def: $vgpr12_vgpr13_vgpr14_vgpr15 +; GFX11-NEXT: ; implicit-def: $vgpr14_vgpr15 ; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX11-NEXT: s_cbranch_execnz .LBB8_1 ; GFX11-NEXT: ; %bb.2: @@ -501,44 +484,42 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x half> %ray_dir, <3 x half> %ray_inv_dir, <4 x i32> %tdescr) { ; GFX1030-LABEL: image_bvh64_intersect_ray_a16_vgpr_descr: ; GFX1030: ; %bb.0: -; GFX1030-NEXT: v_mov_b32_e32 v19, v0 -; GFX1030-NEXT: v_mov_b32_e32 v20, v1 +; GFX1030-NEXT: v_mov_b32_e32 v14, v0 +; GFX1030-NEXT: v_mov_b32_e32 v15, v1 ; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v6 ; GFX1030-NEXT: v_and_b32_e32 v1, 0xffff, v8 -; GFX1030-NEXT: v_mov_b32_e32 v21, v2 +; GFX1030-NEXT: v_mov_b32_e32 v16, v2 ; GFX1030-NEXT: v_and_b32_e32 v2, 0xffff, v9 -; GFX1030-NEXT: v_mov_b32_e32 v22, v3 +; GFX1030-NEXT: v_mov_b32_e32 v17, v3 ; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX1030-NEXT: v_mov_b32_e32 v23, v4 -; GFX1030-NEXT: v_mov_b32_e32 v24, v5 -; GFX1030-NEXT: v_mov_b32_e32 v17, v10 -; GFX1030-NEXT: v_mov_b32_e32 v18, v11 -; GFX1030-NEXT: v_and_or_b32 v25, 0xffff, v6, v0 -; GFX1030-NEXT: v_and_or_b32 v26, 0xffff, v7, v1 -; GFX1030-NEXT: v_alignbit_b32 v27, v2, v8, 16 +; GFX1030-NEXT: v_mov_b32_e32 v18, v4 +; GFX1030-NEXT: v_mov_b32_e32 v19, v5 +; GFX1030-NEXT: v_alignbit_b32 v22, v2, v8, 16 +; GFX1030-NEXT: v_and_or_b32 v20, 0xffff, v6, v0 +; GFX1030-NEXT: v_and_or_b32 v21, 0xffff, v7, v1 ; GFX1030-NEXT: s_mov_b32 s1, exec_lo ; GFX1030-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 -; GFX1030-NEXT: v_readfirstlane_b32 s4, v17 -; GFX1030-NEXT: v_readfirstlane_b32 s5, v18 +; GFX1030-NEXT: v_readfirstlane_b32 s4, v10 +; GFX1030-NEXT: v_readfirstlane_b32 s5, v11 ; GFX1030-NEXT: v_readfirstlane_b32 s6, v12 ; GFX1030-NEXT: v_readfirstlane_b32 s7, v13 -; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[17:18] +; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[10:11] ; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[12:13] ; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1030-NEXT: s_and_saveexec_b32 s0, s0 -; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[19:27], s[4:7] a16 +; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[14:22], s[4:7] a16 +; GFX1030-NEXT: ; implicit-def: $vgpr10 +; GFX1030-NEXT: ; implicit-def: $vgpr14 +; GFX1030-NEXT: ; implicit-def: $vgpr15 +; GFX1030-NEXT: ; implicit-def: $vgpr16 ; GFX1030-NEXT: ; implicit-def: $vgpr17 +; GFX1030-NEXT: ; implicit-def: $vgpr18 ; GFX1030-NEXT: ; implicit-def: $vgpr19 ; GFX1030-NEXT: ; implicit-def: $vgpr20 ; GFX1030-NEXT: ; implicit-def: $vgpr21 ; GFX1030-NEXT: ; implicit-def: $vgpr22 -; GFX1030-NEXT: ; implicit-def: $vgpr23 -; GFX1030-NEXT: ; implicit-def: $vgpr24 -; GFX1030-NEXT: ; implicit-def: $vgpr25 -; GFX1030-NEXT: ; implicit-def: $vgpr26 -; GFX1030-NEXT: ; implicit-def: $vgpr27 -; GFX1030-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13 +; GFX1030-NEXT: ; implicit-def: $vgpr12_vgpr13 ; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1030-NEXT: s_cbranch_execnz .LBB9_1 ; GFX1030-NEXT: ; %bb.2: @@ -548,30 +529,28 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node ; ; GFX1013-LABEL: image_bvh64_intersect_ray_a16_vgpr_descr: ; GFX1013: ; %bb.0: -; GFX1013-NEXT: v_mov_b32_e32 v18, v10 -; GFX1013-NEXT: v_mov_b32_e32 v19, v11 -; GFX1013-NEXT: v_lshrrev_b32_e32 v10, 16, v6 -; GFX1013-NEXT: v_and_b32_e32 v11, 0xffff, v8 +; GFX1013-NEXT: v_lshrrev_b32_e32 v14, 16, v6 +; GFX1013-NEXT: v_and_b32_e32 v15, 0xffff, v8 ; GFX1013-NEXT: v_and_b32_e32 v9, 0xffff, v9 ; GFX1013-NEXT: s_mov_b32 s1, exec_lo -; GFX1013-NEXT: v_lshlrev_b32_e32 v10, 16, v10 -; GFX1013-NEXT: v_lshlrev_b32_e32 v11, 16, v11 +; GFX1013-NEXT: v_lshlrev_b32_e32 v14, 16, v14 +; GFX1013-NEXT: v_lshlrev_b32_e32 v15, 16, v15 ; GFX1013-NEXT: v_alignbit_b32 v8, v9, v8, 16 -; GFX1013-NEXT: v_and_or_b32 v6, 0xffff, v6, v10 -; GFX1013-NEXT: v_and_or_b32 v7, 0xffff, v7, v11 +; GFX1013-NEXT: v_and_or_b32 v6, 0xffff, v6, v14 +; GFX1013-NEXT: v_and_or_b32 v7, 0xffff, v7, v15 ; GFX1013-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 -; GFX1013-NEXT: v_readfirstlane_b32 s4, v18 -; GFX1013-NEXT: v_readfirstlane_b32 s5, v19 +; GFX1013-NEXT: v_readfirstlane_b32 s4, v10 +; GFX1013-NEXT: v_readfirstlane_b32 s5, v11 ; GFX1013-NEXT: v_readfirstlane_b32 s6, v12 ; GFX1013-NEXT: v_readfirstlane_b32 s7, v13 -; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[18:19] +; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[10:11] ; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[12:13] ; GFX1013-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1013-NEXT: s_and_saveexec_b32 s0, s0 ; GFX1013-NEXT: image_bvh64_intersect_ray v[14:17], v[0:8], s[4:7] a16 -; GFX1013-NEXT: ; implicit-def: $vgpr18 +; GFX1013-NEXT: ; implicit-def: $vgpr10 ; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 -; GFX1013-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13 +; GFX1013-NEXT: ; implicit-def: $vgpr12_vgpr13 ; GFX1013-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1013-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1013-NEXT: s_cbranch_execnz .LBB9_1 @@ -591,29 +570,29 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node ; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v9 ; GFX11-NEXT: v_dual_mov_b32 v19, v2 :: v_dual_mov_b32 v14, v3 ; GFX11-NEXT: v_dual_mov_b32 v15, v4 :: v_dual_mov_b32 v16, v5 -; GFX11-NEXT: v_dual_mov_b32 v4, v10 :: v_dual_mov_b32 v5, v11 -; GFX11-NEXT: v_lshl_or_b32 v20, v6, 16, v0 -; GFX11-NEXT: v_perm_b32 v21, v6, v8, 0x7060302 -; GFX11-NEXT: v_lshl_or_b32 v22, v7, 16, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-NEXT: v_lshl_or_b32 v4, v6, 16, v0 +; GFX11-NEXT: v_perm_b32 v5, v6, v8, 0x7060302 +; GFX11-NEXT: v_lshl_or_b32 v6, v7, 16, v1 ; GFX11-NEXT: s_mov_b32 s1, exec_lo ; GFX11-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-NEXT: v_readfirstlane_b32 s4, v4 -; GFX11-NEXT: v_readfirstlane_b32 s5, v5 +; GFX11-NEXT: v_readfirstlane_b32 s4, v10 +; GFX11-NEXT: v_readfirstlane_b32 s5, v11 ; GFX11-NEXT: v_readfirstlane_b32 s6, v12 ; GFX11-NEXT: v_readfirstlane_b32 s7, v13 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[4:5] +; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[10:11] ; GFX11-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[12:13] ; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-NEXT: image_bvh64_intersect_ray v[0:3], [v[17:18], v19, v[14:16], v[20:22]], s[4:7] a16 -; GFX11-NEXT: ; implicit-def: $vgpr4 +; GFX11-NEXT: image_bvh64_intersect_ray v[0:3], [v[17:18], v19, v[14:16], v[4:6]], s[4:7] a16 +; GFX11-NEXT: ; implicit-def: $vgpr10 ; GFX11-NEXT: ; implicit-def: $vgpr17_vgpr18 ; GFX11-NEXT: ; implicit-def: $vgpr19 ; GFX11-NEXT: ; implicit-def: $vgpr14_vgpr15_vgpr16 -; GFX11-NEXT: ; implicit-def: $vgpr20_vgpr21_vgpr22 -; GFX11-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13 +; GFX11-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6 +; GFX11-NEXT: ; implicit-def: $vgpr12_vgpr13 ; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX11-NEXT: s_cbranch_execnz .LBB9_1 ; GFX11-NEXT: ; %bb.2: diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll index 08e64da632d3b..5d743b4afcd29 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll @@ -29161,100 +29161,26 @@ define inreg <32 x i32> @bitcast_v64bf16_to_v32i32_scalar(<64 x bfloat> inreg %a ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:156 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:28 -; GFX11-TRUE16-NEXT: s_clause 0x6 ; 28-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v167, v13 :: v_dual_mov_b32 v176, v12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v177, v11 :: v_dual_mov_b32 v178, v10 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v179, v9 :: v_dual_mov_b32 v180, v8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v181, v7 :: v_dual_mov_b32 v182, v6 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v183, v5 :: v_dual_mov_b32 v168, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v169, v3 :: v_dual_mov_b32 v170, v2 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v171, v1 :: v_dual_mov_b32 v172, v0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v174, s28 :: v_dual_mov_b32 v173, s29 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, v13 :: v_dual_mov_b32 v30, v12 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v9 :: v_dual_mov_b32 v26, v8 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s28 :: v_dual_mov_b32 v17, s29 ; GFX11-TRUE16-NEXT: s_mov_b32 s4, 0 ; GFX11-TRUE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB19_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v135, s0 :: v_dual_mov_b32 v134, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v132, s2 :: v_dual_mov_b32 v129, s3 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v125, s16 :: v_dual_mov_b32 v120, s17 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v114, s18 :: v_dual_mov_b32 v107, s19 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v99, s20 :: v_dual_mov_b32 v90, s21 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v80, s22 :: v_dual_mov_b32 v69, s23 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v57, s24 :: v_dual_mov_b32 v44, s25 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, s26 :: v_dual_mov_b32 v15, s27 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v5, s17 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s18 :: v_dual_mov_b32 v7, s19 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s20 :: v_dual_mov_b32 v9, s21 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s22 :: v_dual_mov_b32 v11, s23 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s24 :: v_dual_mov_b32 v13, s25 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s26 :: v_dual_mov_b32 v15, s27 ; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB19_3 ; GFX11-TRUE16-NEXT: .LBB19_2: ; %cmp.true @@ -29265,972 +29191,674 @@ define inreg <32 x i32> @bitcast_v64bf16_to_v32i32_scalar(<64 x bfloat> inreg %a ; GFX11-TRUE16-NEXT: s_and_b32 s4, s26, 0xffff0000 ; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s26, 16 ; GFX11-TRUE16-NEXT: v_add_f32_e64 v3, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: v_bfe_u32 v4, v1, 16, 1 +; GFX11-TRUE16-NEXT: v_bfe_u32 v5, v1, 16, 1 ; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_bfe_u32 v7, v3, 16, 1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, v4, v1 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v0 ; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, 0x400000, v1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v5, v5, v1 +; GFX11-TRUE16-NEXT: v_bfe_u32 v8, v3, 16, 1 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v4, 0x40c00000, s6 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, 0x400000, v3 ; GFX11-TRUE16-NEXT: s_and_b32 s5, s25, 0xffff0000 -; GFX11-TRUE16-NEXT: s_lshl_b32 s7, s25, 16 -; GFX11-TRUE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 0x7fff, v4 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v5, 0x7fff, v5 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v0 ; GFX11-TRUE16-NEXT: v_add_f32_e64 v6, 0x40c00000, s5 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s24, 0xffff0000 +; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s25, 16 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v2, v8 :: v_dual_add_nc_u32 v7, v7, v3 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v2, v7, vcc_lo ; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v3 -; GFX11-TRUE16-NEXT: v_bfe_u32 v8, v5, 16, 1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v4, 16, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v4, v9, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v4, 0x40c00000, s7 -; GFX11-TRUE16-NEXT: v_bfe_u32 v9, v6, 16, 1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v5, v9, vcc_lo ; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, v8, v3 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v4 +; GFX11-TRUE16-NEXT: v_bfe_u32 v5, v6, 16, 1 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v15, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, 0x400000, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 0x7fff, v7 -; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v4, 16, 1 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s24, 16 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v2, v7, v2 :: v_dual_add_nc_u32 v7, v8, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, v9, v6 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, 0x400000, v5 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v3, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v7 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 0x7fff, v8 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, 0x400000, v6 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v5, v6 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s24, 0xffff0000 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v3, v7, v10, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v6, v7, v8, vcc_lo +; GFX11-TRUE16-NEXT: v_add_f32_e64 v7, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, 0x400000, v6 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v3 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v4 +; GFX11-TRUE16-NEXT: v_bfe_u32 v9, v5, 16, 1 +; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s24, 16 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 +; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v7, 16, 1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, v9, v5 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, 0x400000, v7 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v14, 16, v1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc_lo +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v0.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, v3, v7 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v5 ; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc_lo -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v6 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v30, 16, v3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v30.h, v1.l -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc_lo -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v44, 16, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v44.h, v4.l -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v4, 0x40c00000, s4 ; GFX11-TRUE16-NEXT: s_and_b32 s4, s23, 0xffff0000 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v57, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v57.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x7fff, v0 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v8, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v4, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7 ; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s23, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_bfe_u32 v5, v8, 16, 1 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v13, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v3, v4 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v3, 0x40c00000, s4 ; GFX11-TRUE16-NEXT: s_and_b32 s4, s22, 0xffff0000 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v1.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v5, v8 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v69, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v69.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v4 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v9, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4 +; GFX11-TRUE16-NEXT: v_bfe_u32 v7, v3, 16, 1 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, 0x400000, v8 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s22, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v5, v9, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, v7, v3 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v9 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v12, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v5, v9 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v0.l +; GFX11-TRUE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s4 ; GFX11-TRUE16-NEXT: s_and_b32 s4, s21, 0xffff0000 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v80, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v80.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s21, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_bfe_u32 v8, v5, 16, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v6, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s21, 16 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v4 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, 0x400000, v3 +; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v6, 16, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v9, v9 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, v8, v5 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v11, 16, v1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc_lo +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v0.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, v3, v6 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v5 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v4, 0x40c00000, s4 ; GFX11-TRUE16-NEXT: s_and_b32 s4, s20, 0xffff0000 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v90, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v90.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x7fff, v0 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v8, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v4, 16, 1 ; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s20, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v6 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 +; GFX11-TRUE16-NEXT: v_bfe_u32 v5, v8, 16, 1 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v10, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v3, v4 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v3, 0x40c00000, s4 ; GFX11-TRUE16-NEXT: s_and_b32 s4, s19, 0xffff0000 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.h, v1.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v5, v8 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo +; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v4 +; GFX11-TRUE16-NEXT: v_bfe_u32 v7, v3, 16, 1 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v32, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, 0x400000, v8 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v99, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v99.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, v7, v3 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v32 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v5, v32, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8 ; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s19, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v9, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v5, v32 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v0.l +; GFX11-TRUE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s4 ; GFX11-TRUE16-NEXT: s_and_b32 s4, s18, 0xffff0000 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v107, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v107.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_bfe_u32 v8, v5, 16, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v6, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s18, 16 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v4 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, 0x400000, v3 +; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v6, 16, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, v8, v5 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v8, 16, v1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc_lo +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v0.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, v3, v6 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v5 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v4, 0x40c00000, s4 ; GFX11-TRUE16-NEXT: s_and_b32 s4, s17, 0xffff0000 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v114, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v114.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x7fff, v0 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v32, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v6 ; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s17, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s16, 0xffff0000 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_bfe_u32 v5, v32, 16, 1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v4, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, 0x400000, v32 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v7, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v3, v4 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v3, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v1.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v5, v32 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v4 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v33, v3, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s16, 0xffff0000 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v34, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, v33, v3 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v120, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v120.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_bfe_u32 v5, v34, 16, 1 ; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s16, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc_lo +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v0.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v5, v34 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v32, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v4 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, 0x400000, v3 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3 ; GFX11-TRUE16-NEXT: s_and_b32 s4, s3, 0xffff0000 -; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s3, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v125, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v125.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s3 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v33, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_bfe_u32 v35, v32, 16, 1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s3, 16 +; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v33, 16, 1 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v34, 0x40c00000, s3 ; GFX11-TRUE16-NEXT: s_and_b32 s3, s2, 0xffff0000 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, v35, v32 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v35, 0x40c00000, s3 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v0.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, v3, v33 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v32 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x7fff, v0 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, 0x400000, v33 +; GFX11-TRUE16-NEXT: v_bfe_u32 v32, v35, 16, 1 ; GFX11-TRUE16-NEXT: s_lshl_b32 s2, s2, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v34, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v33, v33 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v33, 0x400000, v35 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v3, v34 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v1.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v32, v35 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v32, 0x40c00000, s2 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v34 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v129, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v129.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s3 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s2 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 ; GFX11-TRUE16-NEXT: s_and_b32 s2, s1, 0xffff0000 -; GFX11-TRUE16-NEXT: s_lshl_b32 s1, s1, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v132, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v132.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s2 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s1 -; GFX11-TRUE16-NEXT: s_and_b32 s1, s0, 0xffff0000 -; GFX11-TRUE16-NEXT: s_lshl_b32 s0, s0, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v134, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v134.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s1 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s0 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v135, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v135.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v167 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v167 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v167, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v167.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v176 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v176 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v176, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v176.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v177 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v177 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v37, 0x40c00000, s2 +; GFX11-TRUE16-NEXT: v_bfe_u32 v36, v32, 16, 1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v177, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v177.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v178 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v178 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v178, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v178.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v179 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v179 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v179, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v179.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v180 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v180 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v180, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v180.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v181 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v181 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v181, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v181.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v182 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v182 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v182, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v182.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v183 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v183 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v183, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v183.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v168 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v168 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v168, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v168.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v169 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v169 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v169, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v169.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v170 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v170 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v170, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v170.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v171 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v171 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v171, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v171.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v172 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v172 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v172, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v172.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v173 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v173 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_bfe_u32 v34, v37, 16, 1 +; GFX11-TRUE16-NEXT: s_lshl_b32 s1, s1, 16 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v33, vcc_lo +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, v36, v32 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v34, v37 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v34, 0x40c00000, s1 +; GFX11-TRUE16-NEXT: s_and_b32 s1, s0, 0xffff0000 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v0.l +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v33 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v33, 0x400000, v32 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v35, 0x40c00000, s1 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v173, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v173.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v174 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v37 +; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v34, 16, 1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v33, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v37, v37 +; GFX11-TRUE16-NEXT: v_bfe_u32 v32, v35, 16, 1 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v37, 0x400000, v35 +; GFX11-TRUE16-NEXT: s_lshl_b32 s0, s0, 16 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v33, v2, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v36, v38, v34 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v0.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, v32, v35 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v33 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v33, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v31 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x7fff, v0 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v36 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v36, 0x40c00000, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v33, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-TRUE16-NEXT: v_bfe_u32 v33, v36, 16, 1 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v34, 0x40c00000, v38 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v35, 0x400000, v36 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v0, v37 :: v_dual_add_nc_u32 v33, v33, v36 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v32.l +; GFX11-TRUE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v30 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v0 +; GFX11-TRUE16-NEXT: v_bfe_u32 v0, v34, 16, 1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v30, 16, v30 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v33, v33, v35 :: v_dual_add_nc_u32 v0, v0, v34 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v37 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v37, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v36, 0x7fff, v0 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v31, 16, v31 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v33 +; GFX11-TRUE16-NEXT: v_bfe_u32 v33, v35, 16, 1 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v32.l +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v34, v36, v37 :: v_dual_add_f32 v31, 0x40c00000, v31 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v30, 0x40c00000, v30 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, v33, v35 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v37, 0x400000, v35 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v34 +; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v31, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v31, v31 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v36, v38, v31 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v29 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v29, 16, v29 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v36 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v31 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v29, 0x40c00000, v29 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v31, v34, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v34, v30, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v38 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v30 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v31, 16, v31 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, v34, v30 +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v32, v32, v37 :: v_dual_and_b32 v37, 0xffff0000, v28 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v31.h, v33.l +; GFX11-TRUE16-NEXT: v_bfe_u32 v33, v35, 16, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v30, v30 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v28, 16, v28 +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v30, v34, v36 :: v_dual_add_nc_u32 v33, v33, v35 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v34, 0x40c00000, v37 +; GFX11-TRUE16-NEXT: v_bfe_u32 v37, v29, 16, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v35 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v30, 16, v30 +; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v34, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-TRUE16-NEXT: v_dual_add_f32 v28, 0x40c00000, v28 :: v_dual_add_nc_u32 v35, v37, v29 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v30.h, v32.l +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v33, v33, v36 :: v_dual_add_nc_u32 v32, v38, v34 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v29 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v37, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v29, v29 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v27 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v29, v35, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v35, v28, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v34, 0x40c00000, v38 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v28 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v29, 16, v29 +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v32, v32, v37 :: v_dual_add_nc_u32 v35, v35, v28 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v26 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v26, 16, v26 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v27, 16, v27 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.h, v33.l +; GFX11-TRUE16-NEXT: v_bfe_u32 v33, v34, 16, 1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v28, v28 +; GFX11-TRUE16-NEXT: v_dual_add_f32 v26, 0x40c00000, v26 :: v_dual_add_f32 v27, 0x40c00000, v27 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, v33, v34 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v28, v35, v36 :: v_dual_add_f32 v35, 0x40c00000, v37 +; GFX11-TRUE16-NEXT: v_bfe_u32 v37, v27, 16, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v35, 16, 1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, v37, v27 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v28, 16, v28 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v28.h, v32.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v33, v33, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v27 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v27, v27 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, v38, v35 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v37, 0x400000, v35 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v25 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v27, v34, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v34, v26, 16, 1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v26 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v38 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, v34, v26 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v27, 16, v27 +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v32, v32, v37 :: v_dual_and_b32 v37, 0xffff0000, v24 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v26, v26 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v24, 16, v24 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v25, 16, v25 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.h, v33.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v26, v34, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_add_f32_e32 v34, 0x40c00000, v37 +; GFX11-TRUE16-NEXT: v_bfe_u32 v33, v35, 16, 1 +; GFX11-TRUE16-NEXT: v_dual_add_f32 v24, 0x40c00000, v24 :: v_dual_add_f32 v25, 0x40c00000, v25 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v26, 16, v26 +; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v34, 16, 1 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v26.h, v32.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v35 +; GFX11-TRUE16-NEXT: v_bfe_u32 v37, v25, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, v38, v34 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v23 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v23, 16, v23 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, v33, v35 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, v37, v25 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v37, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v23, 0x40c00000, v23 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v33, v33, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v25 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v25, v25 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v25, v35, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v35, v24, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v34, 0x40c00000, v38 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v24 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v25, 16, v25 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.h, v33.l +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v32, v32, v37 :: v_dual_and_b32 v37, 0xffff0000, v22 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v22, 16, v22 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, v35, v24 +; GFX11-TRUE16-NEXT: v_bfe_u32 v33, v34, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v24, v24 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v22, 0x40c00000, v22 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v174 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v24, v35, v36 :: v_dual_add_nc_u32 v33, v33, v34 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v37 +; GFX11-TRUE16-NEXT: v_bfe_u32 v37, v23, 16, 1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v35, 16, 1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, v37, v23 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v24, 16, v24 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.h, v32.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v33, v33, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v23 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v23, v23 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, v38, v35 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v37, 0x400000, v35 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v21 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v23, v34, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v34, v22, 16, 1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v22 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v38 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, v34, v22 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v23, 16, v23 +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v32, v32, v37 :: v_dual_and_b32 v37, 0xffff0000, v20 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v22, v22 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.h, v33.l +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-TRUE16-NEXT: v_bfe_u32 v33, v35, 16, 1 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v20, 16, v20 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v22, v34, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_add_f32_e32 v34, 0x40c00000, v37 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v35 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, v33, v35 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v22, 16, v22 +; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v34, 16, 1 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.h, v32.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v20, 0x40c00000, v20 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, v38, v34 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v19 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v21, 16, v21 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v33, v33, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v19, 16, v19 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_f32_e32 v21, 0x40c00000, v21 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_add_f32_e32 v19, 0x40c00000, v19 +; GFX11-TRUE16-NEXT: v_bfe_u32 v37, v21, 16, 1 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v21 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v21, v21 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, v37, v21 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v37, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v174, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v174.h, v0.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v21, v35, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v35, v20, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v34, 0x40c00000, v38 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v20 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v21, 16, v21 +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v32, v32, v37 :: v_dual_add_nc_u32 v35, v35, v20 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v33.l +; GFX11-TRUE16-NEXT: v_bfe_u32 v33, v34, 16, 1 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v18 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v20, v20 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v18, 16, v18 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, v33, v34 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v20, v35, v36 :: v_dual_add_f32 v35, 0x40c00000, v37 +; GFX11-TRUE16-NEXT: v_bfe_u32 v37, v19, 16, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v35, 16, 1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, v37, v19 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v18, 0x40c00000, v18 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v20, 16, v20 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v33, v33, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v19 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v19, v19 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.h, v32.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, v38, v35 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v17 +; GFX11-TRUE16-NEXT: v_bfe_u32 v39, v18, 16, 1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v19, v34, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v38, 0x400000, v35 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v34, 0x40c00000, v37 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, v39, v18 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v19, 16, v19 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v33.l +; GFX11-TRUE16-NEXT: v_bfe_u32 v36, v34, 16, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v32, v32, v38 :: v_dual_add_nc_u32 v33, 0x7fff, v35 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v35, 0x400000, v18 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v17, 16, v17 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v16 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v18, v18 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v36, v36, v34 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_dual_add_f32 v17, 0x40c00000, v17 :: v_dual_lshlrev_b32 v16, 16, v16 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v18, v33, v35 :: v_dual_add_f32 v33, 0x40c00000, v37 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v36 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_bfe_u32 v37, v17, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v33, 16, 1 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v16, 0x40c00000, v16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v39, 0x400000, v17 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v18, 16, v18 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v34, v35, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v36, v37, v17 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v37, v38, v33 +; GFX11-TRUE16-NEXT: v_bfe_u32 v35, v16, 16, 1 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v38, 0x400000, v33 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v33, v33 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v36, 0x7fff, v36 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v37, 0x7fff, v37 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, v35, v16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v48, 0x400000, v16 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v34 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v32.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v33, v37, v38, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v17, v17 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v17, v36, v39, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v16, v16 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v17, 16, v17 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v16, v35, v48, vcc_lo +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v34.l +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v16, 16, v16 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.h, v33.l ; GFX11-TRUE16-NEXT: .LBB19_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, v125 :: v_dual_mov_b32 v5, v120 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, v114 :: v_dual_mov_b32 v7, v107 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, v99 :: v_dual_mov_b32 v9, v90 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, v57 :: v_dual_mov_b32 v13, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, v30 :: v_dual_mov_b32 v17, v173 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, v174 :: v_dual_mov_b32 v19, v171 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v18, v172 :: v_dual_mov_b32 v21, v169 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, v170 :: v_dual_mov_b32 v23, v183 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v22, v168 :: v_dual_mov_b32 v25, v181 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0x6 ; 28-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:280 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, v135 :: v_dual_mov_b32 v1, v134 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, v132 :: v_dual_mov_b32 v3, v129 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, v80 :: v_dual_mov_b32 v11, v69 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v24, v182 :: v_dual_mov_b32 v27, v179 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v180 :: v_dual_mov_b32 v29, v177 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v178 :: v_dual_mov_b32 v31, v167 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v30, v176 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB19_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 ; GFX11-TRUE16-NEXT: s_branch .LBB19_2 ; ; GFX11-FAKE16-LABEL: bitcast_v64bf16_to_v32i32_scalar: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 -; GFX11-FAKE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:288 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:284 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:280 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:276 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:272 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:268 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:264 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:260 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:256 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:252 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:248 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:244 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:240 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:236 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:232 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:228 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:224 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:220 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:216 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:212 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:208 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:204 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:200 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:196 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:192 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:188 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:184 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:180 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:176 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:172 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:168 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:164 -; GFX11-FAKE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:160 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:156 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:152 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:148 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:144 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:140 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:136 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:132 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v120, s32 offset:128 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v121, s32 offset:124 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v122, s32 offset:120 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v123, s32 offset:116 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v124, s32 offset:112 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v125, s32 offset:108 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v126, s32 offset:104 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v127, s32 offset:100 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v136, s32 offset:96 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v137, s32 offset:92 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v138, s32 offset:88 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v139, s32 offset:84 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v140, s32 offset:80 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v141, s32 offset:76 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v142, s32 offset:72 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v143, s32 offset:68 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v152, s32 offset:64 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v153, s32 offset:60 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v154, s32 offset:56 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v155, s32 offset:52 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v156, s32 offset:48 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v157, s32 offset:44 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v158, s32 offset:40 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v159, s32 offset:36 -; GFX11-FAKE16-NEXT: s_clause 0x8 ; 36-byte Folded Spill -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v168, s32 offset:32 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v169, s32 offset:28 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v170, s32 offset:24 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v171, s32 offset:20 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v172, s32 offset:16 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v173, s32 offset:12 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v174, s32 offset:8 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v175, s32 offset:4 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v184, s32 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v178, v13 :: v_dual_mov_b32 v179, v12 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v180, v11 :: v_dual_mov_b32 v181, v9 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v182, v10 :: v_dual_mov_b32 v169, v7 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v170, v8 :: v_dual_mov_b32 v177, v3 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v176, v6 :: v_dual_mov_b32 v171, v4 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v174, v5 :: v_dual_mov_b32 v173, v0 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v184, v2 :: v_dual_mov_b32 v175, v1 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v183, s28 :: v_dual_mov_b32 v172, s29 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v31, v13 :: v_dual_mov_b32 v30, v12 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v27, v9 :: v_dual_mov_b32 v26, v8 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v16, s28 :: v_dual_mov_b32 v17, s29 ; GFX11-FAKE16-NEXT: s_mov_b32 s4, 0 ; GFX11-FAKE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo ; GFX11-FAKE16-NEXT: s_cbranch_scc0 .LBB19_4 ; GFX11-FAKE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v32, s0 :: v_dual_mov_b32 v37, s2 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v34, s1 :: v_dual_mov_b32 v41, s3 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v46, s16 :: v_dual_mov_b32 v59, s18 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v52, s17 :: v_dual_mov_b32 v67, s19 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v76, s20 :: v_dual_mov_b32 v97, s22 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v86, s21 :: v_dual_mov_b32 v109, s23 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v122, s24 :: v_dual_mov_b32 v151, s26 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v136, s25 :: v_dual_mov_b32 v15, s27 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v5, s17 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v6, s18 :: v_dual_mov_b32 v7, s19 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v8, s20 :: v_dual_mov_b32 v9, s21 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v10, s22 :: v_dual_mov_b32 v11, s23 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v12, s24 :: v_dual_mov_b32 v13, s25 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v14, s26 :: v_dual_mov_b32 v15, s27 ; GFX11-FAKE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 ; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB19_3 ; GFX11-FAKE16-NEXT: .LBB19_2: ; %cmp.true @@ -30238,762 +29866,674 @@ define inreg <32 x i32> @bitcast_v64bf16_to_v32i32_scalar(<64 x bfloat> inreg %a ; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s27, 16 ; GFX11-FAKE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s5 ; GFX11-FAKE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s26, 16 ; GFX11-FAKE16-NEXT: s_and_b32 s4, s26, 0xffff0000 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s6 +; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s26, 16 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v3, 0x40c00000, s4 ; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v1, 16, 1 ; GFX11-FAKE16-NEXT: v_bfe_u32 v2, v0, 16, 1 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s6 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, 0x400000, v1 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v3, 0x40c00000, s4 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v8, 0x400000, v0 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v1 +; GFX11-FAKE16-NEXT: v_bfe_u32 v9, v3, 16, 1 ; GFX11-FAKE16-NEXT: v_bfe_u32 v10, v5, 16, 1 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-FAKE16-NEXT: v_bfe_u32 v9, v3, 16, 1 -; GFX11-FAKE16-NEXT: s_lshl_b32 s7, s25, 16 +; GFX11-FAKE16-NEXT: s_and_b32 s5, s25, 0xffff0000 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 0x7fff, v4 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, v2, v0 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s25, 0xffff0000 -; GFX11-FAKE16-NEXT: s_and_b32 s4, s24, 0xffff0000 ; GFX11-FAKE16-NEXT: v_add_f32_e64 v6, 0x40c00000, s5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v51, 0xffff0000, v183 +; GFX11-FAKE16-NEXT: s_lshl_b32 s7, s25, 16 +; GFX11-FAKE16-NEXT: s_and_b32 s4, s24, 0xffff0000 +; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s24, 16 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v8, vcc_lo +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, v9, v3 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, v10, v5 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, v9, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, 0x400000, v5 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, 0x400000, v3 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-FAKE16-NEXT: v_bfe_u32 v10, v6, 16, 1 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v4, v7, vcc_lo +; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v1, v4, v7 :: v_dual_add_nc_u32 v2, 0x7fff, v2 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v5 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 0x7fff, v8 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 ; GFX11-FAKE16-NEXT: v_add_f32_e64 v8, 0x40c00000, s7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 +; GFX11-FAKE16-NEXT: v_bfe_u32 v5, v6, 16, 1 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3 -; GFX11-FAKE16-NEXT: v_bfe_u32 v3, v8, 16, 1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v15, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, v3, v8 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc_lo -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, v10, v6 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3 +; GFX11-FAKE16-NEXT: v_bfe_u32 v7, v8, 16, 1 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v15, v1, 16, v0 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v4 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc_lo +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v5, v6 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, 0x400000, v6 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v4, v7, v9, vcc_lo -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, v7, v8 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v8 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v7, 0x40c00000, s5 +; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s23, 16 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v14, v0, 16, v1 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v5 ; GFX11-FAKE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s24, 16 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v4 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v6 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, 0x400000, v8 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v9, 0x40c00000, s4 +; GFX11-FAKE16-NEXT: v_bfe_u32 v8, v7, 16, 1 ; GFX11-FAKE16-NEXT: s_and_b32 s4, s23, 0xffff0000 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_4) +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo ; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v5, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v8, 0x400000, v5 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, 0x400000, v9 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, v8, v7 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v8, 0x40c00000, s5 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v5 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v6, v1, v7 :: v_dual_and_b32 v1, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_bfe_u32 v7, v9, 16, 1 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v3 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 0x7fff, v4 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v6 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, v7, v9 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v7, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s23, 16 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v151, v0, 16, v1 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v12, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 0x7fff, v6 -; GFX11-FAKE16-NEXT: v_bfe_u32 v11, v7, 16, 1 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v5, v4, v8, vcc_lo -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v9, v9 -; GFX11-FAKE16-NEXT: s_and_b32 s4, s22, 0xffff0000 -; GFX11-FAKE16-NEXT: v_bfe_u32 v9, v12, 16, 1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, v11, v7 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xffff, v3 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc_lo -; GFX11-FAKE16-NEXT: v_add_f32_e64 v10, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s22, 16 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v5 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v11, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 0x7fff, v8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, v9, v12 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, 0x400000, v7 -; GFX11-FAKE16-NEXT: v_bfe_u32 v14, v10, 16, 1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc_lo +; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, 0x400000, v5 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v13, 0x400000, v12 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, 0x7fff, v8 +; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s22, 16 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v6 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v7 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v6, 0x40c00000, s4 +; GFX11-FAKE16-NEXT: v_bfe_u32 v7, v8, 16, 1 +; GFX11-FAKE16-NEXT: s_and_b32 s4, s22, 0xffff0000 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v6, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, v7, v8 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v7, 0x40c00000, s5 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v6 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc_lo +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v13, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v5 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v8 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, 0x400000, v6 +; GFX11-FAKE16-NEXT: v_bfe_u32 v8, v7, 16, 1 ; GFX11-FAKE16-NEXT: s_and_b32 s4, s21, 0xffff0000 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v7, v6, v9, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v9, v11, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v12, v12 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, v14, v10 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xffff, v5 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v7 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v8, v8, v13 :: v_dual_add_nc_u32 v7, v9, v11 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v9, 0x7fff, v12 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, 0x400000, v10 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v13, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v10, v10 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s21, 16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 0x7fff, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v14, 0x400000, v11 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v16, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v9, v9, v12, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v10, v13, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v11, v11 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 16, v8 -; GFX11-FAKE16-NEXT: v_bfe_u32 v12, v16, 16, 1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v5, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, v8, v7 +; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s21, 16 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v5 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc_lo +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v12, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v8, 0x40c00000, s5 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v6 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v7 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v6, 0x40c00000, s4 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, 0x400000, v5 +; GFX11-FAKE16-NEXT: v_bfe_u32 v7, v8, 16, 1 ; GFX11-FAKE16-NEXT: s_and_b32 s4, s20, 0xffff0000 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v11, v7, v14 :: v_dual_add_nc_u32 v10, v10, v13 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xffff, v8 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 16, v9 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v14, 0x400000, v13 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v10, 0x7fff, v10 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v9, 16, v11 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, v12, v16 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v12, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v13, v13 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s20, 16 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v17, 0x400000, v16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 0x7fff, v11 -; GFX11-FAKE16-NEXT: v_bfe_u32 v18, v12, 16, 1 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v19, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v13, v10, v14, vcc_lo -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v16, v16 +; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s20, 16 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v6, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, v7, v8 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v11, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v6 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc_lo +; GFX11-FAKE16-NEXT: v_add_f32_e64 v7, 0x40c00000, s5 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v5 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v8 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, 0x400000, v6 +; GFX11-FAKE16-NEXT: v_bfe_u32 v8, v7, 16, 1 ; GFX11-FAKE16-NEXT: s_and_b32 s4, s19, 0xffff0000 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v14, v18, v12 -; GFX11-FAKE16-NEXT: v_bfe_u32 v16, v19, 16, 1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v10, 0xffff, v9 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v11, v11, v17, vcc_lo -; GFX11-FAKE16-NEXT: v_add_f32_e64 v17, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s19, 16 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v9, 16, v13 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v13, 0x7fff, v14 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v14, v16, v19 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v16, 0x400000, v12 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v18, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_bfe_u32 v21, v17, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v12, v12 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v11, 16, v11 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v14, 0x7fff, v14 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v20, 0x400000, v19 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v5, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, v8, v7 +; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s19, 16 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v5 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc_lo +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v10, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v8, 0x40c00000, s5 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v6 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v7 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v6, 0x40c00000, s4 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, 0x400000, v5 +; GFX11-FAKE16-NEXT: v_bfe_u32 v7, v8, 16, 1 ; GFX11-FAKE16-NEXT: s_and_b32 s4, s18, 0xffff0000 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v13, v13, v16, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v16, v18, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v19, v19 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v19, v21, v17 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xffff, v11 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v11, 16, v13 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v14, v14, v20 :: v_dual_add_nc_u32 v13, v16, v18 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v16, 0x7fff, v19 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v19, 0x400000, v17 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v20, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v17, v17 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s18, 16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v13, 0x7fff, v13 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v21, 0x400000, v18 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v22, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v16, v16, v19, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v17, v20, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v18, v18 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v14, 16, v14 -; GFX11-FAKE16-NEXT: v_bfe_u32 v19, v22, 16, 1 +; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s18, 16 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v6, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, v7, v8 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v7, 0x40c00000, s5 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v6 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc_lo +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v9, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v5 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v8 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v32, 0x400000, v6 +; GFX11-FAKE16-NEXT: v_bfe_u32 v8, v7, 16, 1 ; GFX11-FAKE16-NEXT: s_and_b32 s4, s17, 0xffff0000 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, v17, v20 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v18, v13, v21 :: v_dual_and_b32 v13, 0xffff, v14 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v14, 16, v16 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v21, 0x400000, v20 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, 0x7fff, v17 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v16, 16, v18 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v18, v19, v22 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v19, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v20, v20 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s17, 16 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v23, 0x400000, v22 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v18, 0x7fff, v18 -; GFX11-FAKE16-NEXT: v_bfe_u32 v24, v19, 16, 1 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v25, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v20, v17, v21, vcc_lo -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v22, v22 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v5, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 +; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s17, 16 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v33, 0x400000, v5 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v5 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v32, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, v8, v7 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v8, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v6 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v7 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v6, 0x40c00000, s4 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v32, 0x40c00000, s5 ; GFX11-FAKE16-NEXT: s_and_b32 s4, s16, 0xffff0000 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v21, v24, v19 -; GFX11-FAKE16-NEXT: v_bfe_u32 v22, v25, 16, 1 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v18, v18, v23 :: v_dual_and_b32 v17, 0xffff, v16 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v23, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s16, 16 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v16, 16, v20 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v20, 0x7fff, v21 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v21, v22, v25 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v22, 0x400000, v19 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v24, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_bfe_u32 v27, v23, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v19, v19 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v18, 16, v18 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v21, 0x7fff, v21 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v26, 0x400000, v25 +; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s16, 16 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v6, 16, 1 +; GFX11-FAKE16-NEXT: v_bfe_u32 v7, v32, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v34, 0x400000, v6 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v6 +; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v2, v2, v33 :: v_dual_add_nc_u32 v5, v7, v32 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v7, v0, 16, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v5 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v32 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s4 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v33, 0x40c00000, s5 ; GFX11-FAKE16-NEXT: s_and_b32 s4, s3, 0xffff0000 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v20, v20, v22, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v22, v24, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v25, v25 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v25, v27, v23 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v19, 0xffff, v18 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v18, 16, v20 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v21, v21, v26 :: v_dual_add_nc_u32 v20, v22, v24 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v22, 0x7fff, v25 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v25, 0x400000, v23 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v26, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v23, v23 ; GFX11-FAKE16-NEXT: s_lshl_b32 s3, s3, 16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v20, 0x7fff, v20 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v27, 0x400000, v24 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v28, 0x40c00000, s3 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v22, v22, v25, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v23, v26, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v24, v24 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v21, 16, v21 -; GFX11-FAKE16-NEXT: v_bfe_u32 v25, v28, 16, 1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v5, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 +; GFX11-FAKE16-NEXT: v_bfe_u32 v32, v33, 16, 1 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v6, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v35, 0x400000, v5 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v5 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v34, vcc_lo +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v33, v33 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v34, 0x40c00000, s3 ; GFX11-FAKE16-NEXT: s_and_b32 s3, s2, 0xffff0000 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v24, v20, v27 :: v_dual_add_nc_u32 v23, v23, v26 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v20, 0xffff, v21 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v21, 16, v22 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v27, 0x400000, v26 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v23, 0x7fff, v23 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v22, 16, v24 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v24, v25, v28 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v25, 0x40c00000, s3 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v26, v26 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v33 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v32, v33 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_bfe_u32 v33, v34, 16, 1 ; GFX11-FAKE16-NEXT: s_lshl_b32 s2, s2, 16 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v29, 0x400000, v28 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v24, 0x7fff, v24 -; GFX11-FAKE16-NEXT: v_bfe_u32 v30, v25, 16, 1 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v31, 0x40c00000, s2 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v26, v23, v27, vcc_lo -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v28, v28 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v32 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v32, 0x40c00000, s4 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v33, v34 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v32, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v5, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v36, 0x400000, v32 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v32 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v35, vcc_lo +; GFX11-FAKE16-NEXT: v_add_f32_e64 v35, 0x40c00000, s2 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v33 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v34 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v33, 0x40c00000, s3 +; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v35, 16, 1 ; GFX11-FAKE16-NEXT: s_and_b32 s2, s1, 0xffff0000 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, v30, v25 -; GFX11-FAKE16-NEXT: v_bfe_u32 v28, v31, 16, 1 ; GFX11-FAKE16-NEXT: s_lshl_b32 s1, s1, 16 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v24, v24, v29, vcc_lo -; GFX11-FAKE16-NEXT: v_add_f32_e64 v29, 0x40c00000, s2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v23, 0xffff, v22 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v22, 16, v26 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v26, 0x7fff, v27 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, v28, v31 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v28, 0x400000, v25 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v30, 0x40c00000, s1 -; GFX11-FAKE16-NEXT: v_bfe_u32 v33, v29, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v25, v25 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v24, 16, v24 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, 0x7fff, v27 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v32, 0x400000, v31 -; GFX11-FAKE16-NEXT: s_and_b32 s1, s0, 0xffff0000 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v26, v26, v28, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v28, v30, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v31, v31 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, v33, v29 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v25, 0xffff, v24 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v24, 16, v26 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v27, v27, v32 :: v_dual_add_nc_u32 v26, v28, v30 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v33, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v34, v35 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v33 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v28, 0x7fff, v31 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v31, 0x400000, v29 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v32, 0x40c00000, s1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v29, v29 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v4, v33 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v36, vcc_lo +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v4, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v36, 0x40c00000, s1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v32 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v34 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v32, 0x400000, v35 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v34, 0x40c00000, s2 +; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v36, 16, 1 +; GFX11-FAKE16-NEXT: s_and_b32 s1, s0, 0xffff0000 ; GFX11-FAKE16-NEXT: s_lshl_b32 s0, s0, 16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v26, 0x7fff, v26 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v33, 0x400000, v30 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v34, 0x40c00000, s0 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v28, v28, v31, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v29, v32, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v30, v30 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v35, 0x400000, v32 -; GFX11-FAKE16-NEXT: v_bfe_u32 v31, v34, 16, 1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v36, 0x400000, v34 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v27, 16, v27 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v30, v26, v33, vcc_lo -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v26, 16, v28 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v28, v29, v32 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v33, 16, v178 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v29, 16, v30 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v30, v31, v34 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v31, 0xffff0000, v178 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v28, 0x7fff, v28 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v33, 0x40c00000, v33 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v109, v5, 16, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v30, 0x7fff, v30 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v31, 0x40c00000, v31 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v32, v28, v35, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v37, v33, 16, 1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v32, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v32, v34, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v33, v33 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, v35, v36 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v38, 0x40c00000, s0 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v32, v34 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v37, vcc_lo +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v3, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v33 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v33, 0x400000, v36 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v32 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v35 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v35, 0x40c00000, s1 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v34 +; GFX11-FAKE16-NEXT: v_bfe_u32 v36, v38, 16, 1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v32, v32, v33, vcc_lo ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v28, 0xffff, v29 +; GFX11-FAKE16-NEXT: v_bfe_u32 v33, v35, 16, 1 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v2, v37, vcc_lo +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v2, v0, 16, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, v33, v35 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v36, v38 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v36, 0x400000, v38 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v34 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff0000, v31 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v31, 16, v31 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xffff, v32 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_dual_add_f32 v34, 0x40c00000, v34 :: v_dual_add_f32 v31, 0x40c00000, v31 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v35 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v33, v33, v36, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-FAKE16-NEXT: v_bfe_u32 v36, v34, 16, 1 ; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v31, 16, 1 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v29, 16, v32 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v37, v33 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v30, v30, v36, vcc_lo -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v36, 16, v179 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v35, v31 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v33 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v33, v33 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v179 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v31 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x7fff, v0 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v1, v1, 16, v32 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v36, v34 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, v35, v31 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v36, 0xffff0000, v30 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v30, 16, v30 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v0, v37, vcc_lo +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v31 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 ; GFX11-FAKE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v36 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v32, v32, v37, vcc_lo +; GFX11-FAKE16-NEXT: v_add_f32_e32 v30, 0x40c00000, v30 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v31, v31 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v35 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v180 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v36 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v31, v34, v38, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v36, 16, 1 -; GFX11-FAKE16-NEXT: v_bfe_u32 v33, v35, 16, 1 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v38, 16, v180 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_dual_add_f32 v37, 0x40c00000, v37 :: v_dual_add_nc_u32 v34, v34, v36 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v33, v35 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v48, 0x400000, v35 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v38, 0x40c00000, v38 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v33, 0xffff, v33 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v34 +; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v31, v35, v37 :: v_dual_add_nc_u32 v32, 0x7fff, v32 +; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v36, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v30, 16, 1 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v0, v0, 16, v33 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v31, 16, v31 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v35, v36 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v29 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v34, v30 +; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v32, v32, v38 :: v_dual_lshlrev_b32 v29, 16, v29 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v30 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v35 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_add_f32_e32 v29, 0x40c00000, v29 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v30, v30 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v31, 0xffff, v31 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v36 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xffff, v32 -; GFX11-FAKE16-NEXT: v_bfe_u32 v36, v37, 16, 1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v30, v34, v37, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v35, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 +; GFX11-FAKE16-NEXT: v_bfe_u32 v36, v29, 16, 1 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v31, v32, 16, v31 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v30, 16, v30 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v34, v39, vcc_lo -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 -; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v38, 16, 1 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v178, v31, 16, v32 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, v36, v37 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v33, v33, v48 :: v_dual_lshlrev_b32 v36, 16, v182 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v34, 16, v34 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v38 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38 +; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v33, v33, v38 :: v_dual_add_nc_u32 v32, v34, v35 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v36 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v33 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v35, v38 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v182 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff, v34 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v48, 0x400000, v37 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, 0x7fff, v31 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v35 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v179, v32, 16, v34 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v30, 0xffff, v30 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v136, v2, 16, v4 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v33, v33, v39, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v38, v35, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v37, v37 -; GFX11-FAKE16-NEXT: v_bfe_u32 v37, v36, 16, 1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v36 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v36, v29 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v36, 0xffff0000, v28 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v28, 16, v28 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v29 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v29, v29 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v31, v31, v48 :: v_dual_add_nc_u32 v38, v38, v35 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v37, v36 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v181 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v38 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v38, 16, v181 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v48, 0x400000, v35 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v37, 0x40c00000, v37 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v31, 16, v31 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v38, 0x40c00000, v38 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v32, v32, v39, vcc_lo +; GFX11-FAKE16-NEXT: v_add_f32_e32 v28, 0x40c00000, v28 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v30, 0xffff, v30 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v36 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v35 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v29, v34, v37, vcc_lo ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v33, 0xffff, v33 -; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v37, 16, 1 -; GFX11-FAKE16-NEXT: v_bfe_u32 v36, v38, 16, 1 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v34, v48, vcc_lo -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v180, v31, 16, v33 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v35, v37 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v170 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v38 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v31, 16, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v36, v38 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_dual_add_f32 v35, 0x40c00000, v35 :: v_dual_lshlrev_b32 v36, 16, v170 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xffff, v32 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v28, 16, 1 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v36, 16, 1 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v30, v33, 16, v30 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v28 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v32, v32, v38 :: v_dual_add_nc_u32 v33, v35, v28 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v34, v36 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v27 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 16, v27 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v36 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v48, 0x400000, v37 -; GFX11-FAKE16-NEXT: v_bfe_u32 v38, v35, 16, 1 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v34, v39, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v39, v36, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v37, v37 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v182, v31, 16, v32 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v37, v38, v35 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v34, 16, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, v39, v36 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v33, v33, v48, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v28, v28 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v29, 16, v29 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v36 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v35 +; GFX11-FAKE16-NEXT: v_dual_add_f32 v27, 0x40c00000, v27 :: v_dual_cndmask_b32 v28, v33, v37 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v48, 0x400000, v35 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, 0x7fff, v31 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v29, 0xffff, v29 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_bfe_u32 v36, v27, 16, 1 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v28, 16, v28 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v33, v34, v38, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v35, 16, 1 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v29, v32, 16, v29 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v27 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v27, v27 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v33 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v33, 0xffff, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v37 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v169 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v31, v31, v38, vcc_lo +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v34, v35 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v36, v27 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v36, 0xffff0000, v26 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v26, 16, v26 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v28, 0xffff, v28 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v36 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v26, 0x40c00000, v26 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v35 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v28, v32, 16, v28 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v27, v34, v37, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v36, 16, 1 +; GFX11-FAKE16-NEXT: v_bfe_u32 v37, v26, 16, 1 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v39, 16, v169 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v181, v32, 16, v33 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v37, 0x40c00000, v37 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v176 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v34, v48, vcc_lo -; GFX11-FAKE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v39 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v31, 16, v31 -; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v37, 16, 1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v37 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v34 -; GFX11-FAKE16-NEXT: v_bfe_u32 v32, v36, 16, 1 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v38, 0x40c00000, v38 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v35, v37 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v35, 16, v176 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v37, v37 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v32, v36 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v31, 0xffff, v31 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v25 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v27, 16, v27 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v34, v36 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v37, v26 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v33, v33, v38, vcc_lo +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v26 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v26, v26 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v35 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v48, 0x400000, v36 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 -; GFX11-FAKE16-NEXT: v_bfe_u32 v37, v38, 16, 1 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v34, v39, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v49, v35, 16, 1 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v25 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v36 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v35 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v26, v32, v37, vcc_lo ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 ; GFX11-FAKE16-NEXT: v_and_b32_e32 v27, 0xffff, v27 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v170, v33, 16, v31 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v31, 16, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, v49, v35 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v32, v32, v48 :: v_dual_add_nc_u32 v33, v37, v38 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v174 +; GFX11-FAKE16-NEXT: v_dual_add_f32 v25, 0x40c00000, v25 :: v_dual_lshlrev_b32 v36, 16, v24 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v24, 0xffff0000, v24 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v32, v34, v38, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v35, 16, 1 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v27, v33, 16, v27 +; GFX11-FAKE16-NEXT: v_bfe_u32 v33, v25, 16, 1 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v36 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v35 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v34, v35 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v36 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v36, 0x400000, v35 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v33, v25 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v26, 16, v26 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v24, 0x40c00000, v24 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v25 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-FAKE16-NEXT: v_bfe_u32 v39, v36, 16, 1 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v34, v37, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v25, v25 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v26, 0xffff, v26 +; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v24, 16, 1 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v23 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v23, 16, v23 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v25, v33, v38, vcc_lo +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v39, v36 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v26, v32, 16, v26 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v34 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v35, v24 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v35, 0x400000, v36 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v38 -; GFX11-FAKE16-NEXT: v_dual_add_f32 v35, 0x40c00000, v37 :: v_dual_cndmask_b32 v34, v34, v36 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v36, 16, v174 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xffff, v32 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_bfe_u32 v37, v35, 16, 1 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v34, 16, v34 -; GFX11-FAKE16-NEXT: v_dual_add_f32 v36, 0x40c00000, v36 :: v_dual_cndmask_b32 v33, v33, v39 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v169, v31, 16, v32 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v37, v37, v35 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff, v34 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_bfe_u32 v31, v36, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v25, 16, v25 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v24 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v23, 0x40c00000, v23 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v33, v33, v35, vcc_lo +; GFX11-FAKE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v37 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v24, v24 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v25, 0xffff, v25 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v36, 0xffff0000, v22 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xffff0000, v171 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v22, 16, v22 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v24, v34, v38, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v35, 16, 1 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v25, v32, 16, v25 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v33, 0xffff, v33 +; GFX11-FAKE16-NEXT: v_bfe_u32 v32, v23, 16, 1 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v24, 16, v24 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v34, v35 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v36 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v38, 16, v177 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, v31, v36 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v176, v33, 16, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v37 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v32, v23 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v24, v24, 16, v33 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v34 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v34, 0x400000, v35 -; GFX11-FAKE16-NEXT: v_dual_add_f32 v32, 0x40c00000, v32 :: v_dual_lshlrev_b32 v37, 16, v171 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, 0x7fff, v31 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v38, 0x40c00000, v38 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v36, 16, 1 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v22, 0x40c00000, v22 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v36 ; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v33, v33, v34, vcc_lo -; GFX11-FAKE16-NEXT: v_or_b32_e32 v34, 0x400000, v36 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v37 -; GFX11-FAKE16-NEXT: v_bfe_u32 v37, v32, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v32 -; GFX11-FAKE16-NEXT: v_bfe_u32 v50, v38, 16, 1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v48, 0x400000, v35 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v31, v31, v34 :: v_dual_add_nc_u32 v36, v37, v32 -; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v35, 16, 1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v177 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, 0x7fff, v36 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v31, 16, v31 -; GFX11-FAKE16-NEXT: v_dual_add_f32 v37, 0x40c00000, v37 :: v_dual_add_nc_u32 v34, v34, v35 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v34, 0x400000, v23 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v23, v23 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v22 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v32, v36, v39, vcc_lo -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v23, v32, v34 :: v_dual_add_nc_u32 v34, v35, v36 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v21 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v21, 16, v21 +; GFX11-FAKE16-NEXT: v_bfe_u32 v32, v22, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 -; GFX11-FAKE16-NEXT: v_bfe_u32 v49, v37, 16, 1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, v50, v38 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v38 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v50, 16, v184 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v35 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_dual_add_f32 v21, 0x40c00000, v21 :: v_dual_add_nc_u32 v32, v32, v22 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v23, 16, v23 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v34, v37, vcc_lo ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v34, v34, v48 :: v_dual_add_nc_u32 v35, v49, v37 +; GFX11-FAKE16-NEXT: v_bfe_u32 v39, v35, 16, 1 +; GFX11-FAKE16-NEXT: v_bfe_u32 v48, v21, 16, 1 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v22, v22 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v21 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v23, 0xffff, v23 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, v48, v21 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v34, 16, v34 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v22, v32, v38, vcc_lo +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v39, v35 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v20 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, 0x7fff, v36 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v48, 0xffff0000, v184 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v20, 16, v20 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v21, v21 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v22, 16, v22 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v35 +; GFX11-FAKE16-NEXT: v_dual_add_f32 v20, 0x40c00000, v20 :: v_dual_cndmask_b32 v21, v36, v37 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v38 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v22, 0xffff, v22 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v34, 16, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v49, 0x400000, v37 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v36, v36, v39, vcc_lo -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v37, v37 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v37, 0x40c00000, v50 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v31, 0xffff, v31 +; GFX11-FAKE16-NEXT: v_bfe_u32 v37, v20, 16, 1 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v21, 16, v21 +; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v36, 16, 1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v32, v32, v39, vcc_lo +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v23, v33, 16, v23 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v22, v34, 16, v22 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v21, 0xffff, v21 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, v35, v36 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 -; GFX11-FAKE16-NEXT: v_dual_add_f32 v38, 0x40c00000, v48 :: v_dual_cndmask_b32 v35, v35, v49 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v36, 16, v36 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff, v34 -; GFX11-FAKE16-NEXT: v_bfe_u32 v48, v37, 16, 1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_bfe_u32 v39, v38, 16, 1 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v35, 16, v35 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v36, 0xffff, v36 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v174, v33, 16, v31 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v171, v32, 16, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, v48, v37 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v33, 0xffff0000, v175 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v34, 16, v175 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v39, v39, v38 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v177, v35, 16, v36 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, 0x7fff, v31 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v35, 0x400000, v37 -; GFX11-FAKE16-NEXT: v_dual_add_f32 v33, 0x40c00000, v33 :: v_dual_add_f32 v34, 0x40c00000, v34 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v37, v37 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v39 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v36, 0x400000, v38 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v37, v20 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff0000, v19 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v19, 16, v19 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v20, v20 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v21, v32, 16, v21 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v35 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v35, 0x400000, v20 +; GFX11-FAKE16-NEXT: v_dual_add_f32 v19, 0x40c00000, v19 :: v_dual_add_f32 v34, 0x40c00000, v34 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v36 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v20, v33, v35 :: v_dual_and_b32 v33, 0xffff0000, v18 +; GFX11-FAKE16-NEXT: v_bfe_u32 v38, v19, 16, 1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v34, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 +; GFX11-FAKE16-NEXT: v_dual_add_f32 v33, 0x40c00000, v33 :: v_dual_lshlrev_b32 v18, 16, v18 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, v38, v19 +; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v32, v32, v37 :: v_dual_add_nc_u32 v35, v35, v34 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v19 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-FAKE16-NEXT: v_bfe_u32 v37, v33, 16, 1 -; GFX11-FAKE16-NEXT: v_bfe_u32 v39, v34, 16, 1 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v31, v31, v35, vcc_lo -; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v173 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v48, 16, v173 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v49, 0x400000, v33 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v31, 16, v31 -; GFX11-FAKE16-NEXT: v_dual_add_f32 v35, 0x40c00000, v35 :: v_dual_cndmask_b32 v32, v32, v36 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, v37, v33 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v37, v39, v34 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, 0x7fff, v36 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v19, v19 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v34 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_bfe_u32 v38, v35, 16, 1 +; GFX11-FAKE16-NEXT: v_dual_add_f32 v18, 0x40c00000, v18 :: v_dual_add_nc_u32 v37, v37, v33 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v19, v36, v38, vcc_lo ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, 0x7fff, v36 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v37, 0x7fff, v37 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v38, v38, v35 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v31, 0xffff, v31 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v122, v3, 16, v6 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v37, v39, vcc_lo +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v20, 16, v20 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, 0x7fff, v37 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v33 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v18 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v35, v39, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v18, 16, 1 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v33, v33 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v37, 0x7fff, v38 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v38, 0x40c00000, v48 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v35 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v33, v36, v49 :: v_dual_lshlrev_b32 v48, 16, v183 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_bfe_u32 v36, v38, 16, 1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v49, 0x400000, v38 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v48, 0x40c00000, v48 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v34, 16, v34 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v35, v37, v39, vcc_lo -; GFX11-FAKE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v172 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v39, 16, v172 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, v36, v38 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v55, 0x400000, v48 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v19, 16, v19 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v20, 0xffff, v20 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, v35, v18 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v33, v36, v37, vcc_lo +; GFX11-FAKE16-NEXT: v_and_b32_e32 v36, 0xffff0000, v17 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v17, 16, v17 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v37, 16, v16 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v18, v18 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_dual_add_f32 v36, 0x40c00000, v36 :: v_dual_add_f32 v17, 0x40c00000, v17 ; GFX11-FAKE16-NEXT: v_add_f32_e32 v37, 0x40c00000, v37 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v39, 0x40c00000, v39 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, 0x7fff, v36 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff, v34 -; GFX11-FAKE16-NEXT: v_bfe_u32 v50, v37, 16, 1 -; GFX11-FAKE16-NEXT: v_bfe_u32 v38, v39, 16, 1 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v36, v36, v49, vcc_lo -; GFX11-FAKE16-NEXT: v_or_b32_e32 v54, 0x400000, v39 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v39, v39 -; GFX11-FAKE16-NEXT: v_dual_add_f32 v50, 0x40c00000, v51 :: v_dual_add_nc_u32 v49, v50, v37 -; GFX11-FAKE16-NEXT: v_bfe_u32 v51, v48, 16, 1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v38, v38, v39 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v53, 0x400000, v37 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v16, 0xffff0000, v16 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v18, v35, v38, vcc_lo ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v49, 0x7fff, v49 -; GFX11-FAKE16-NEXT: v_bfe_u32 v52, v50, 16, 1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v51, v51, v48 +; GFX11-FAKE16-NEXT: v_bfe_u32 v39, v36, 16, 1 +; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v17, 16, 1 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v50, 0x400000, v17 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v16, 0x40c00000, v16 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v17, v17 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v38, v39, v36 +; GFX11-FAKE16-NEXT: v_bfe_u32 v39, v37, 16, 1 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, v35, v17 +; GFX11-FAKE16-NEXT: v_bfe_u32 v48, v16, 16, 1 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v51, 0x400000, v37 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v38, 0x7fff, v38 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v36, 16, v36 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v35, 16, v35 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v52, v52, v50 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v38, v38, v54 :: v_dual_add_nc_u32 v51, 0x7fff, v51 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v48, v48 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v36, 0xffff, v36 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v39, 0x7fff, v52 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v52, 0x400000, v50 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v38, 16, v38 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v48, v51, v55, vcc_lo +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v39, v39, v37 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v49, 0x400000, v36 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v48, v48, v16 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v18, 16, v18 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v39, 0x7fff, v39 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v17, v35, v50, vcc_lo ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v37, v37 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v184, v32, 16, v31 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v175, v33, 16, v34 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v38, 0xffff, v38 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v48, 16, v48 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v37, v49, v53, vcc_lo -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v50, v50 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v173, v35, 16, v36 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v97, v8, 16, v10 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v48, 0xffff, v48 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v37, 16, v37 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v39, v39, v52, vcc_lo -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v86, v9, 16, v12 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v76, v11, 16, v13 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v67, v14, 16, v17 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v172, v37, 16, v38 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v39, 16, v39 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v59, v16, 16, v19 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v52, v18, 16, v20 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v46, v21, 16, v23 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v41, v22, 16, v25 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v183, v39, 16, v48 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v37, v24, 16, v27 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v34, v26, 16, v28 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v32, v29, 16, v30 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v48 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v48, 0x400000, v16 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v34, 16, v34 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v17, 16, v17 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v37, v39, v51, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v19, 0xffff, v19 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v18, 0xffff, v18 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v17, 0xffff, v17 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v36, v38, v49, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v16, v16 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v20, v32, 16, v20 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v19, v34, 16, v19 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v18, v33, 16, v18 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v36, 16, v36 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v16, v35, v48, vcc_lo +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v35, 16, v37 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v17, v36, 16, v17 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v16, 16, v16 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff, v35 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v16, v16, 16, v35 ; GFX11-FAKE16-NEXT: .LBB19_3: ; %end -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v3, v41 :: v_dual_mov_b32 v4, v46 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v6, v59 :: v_dual_mov_b32 v9, v86 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v7, v67 :: v_dual_mov_b32 v8, v76 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v10, v97 :: v_dual_mov_b32 v13, v136 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v11, v109 :: v_dual_mov_b32 v12, v122 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v14, v151 :: v_dual_mov_b32 v17, v172 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v18, v173 :: v_dual_mov_b32 v19, v175 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v20, v184 :: v_dual_mov_b32 v23, v174 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v22, v171 :: v_dual_mov_b32 v25, v169 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v26, v170 :: v_dual_mov_b32 v29, v180 -; GFX11-FAKE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-FAKE16-NEXT: scratch_load_b32 v184, off, s32 -; GFX11-FAKE16-NEXT: scratch_load_b32 v175, off, s32 offset:4 -; GFX11-FAKE16-NEXT: scratch_load_b32 v174, off, s32 offset:8 -; GFX11-FAKE16-NEXT: scratch_load_b32 v173, off, s32 offset:12 -; GFX11-FAKE16-NEXT: scratch_load_b32 v172, off, s32 offset:16 -; GFX11-FAKE16-NEXT: scratch_load_b32 v171, off, s32 offset:20 -; GFX11-FAKE16-NEXT: scratch_load_b32 v170, off, s32 offset:24 -; GFX11-FAKE16-NEXT: scratch_load_b32 v169, off, s32 offset:28 -; GFX11-FAKE16-NEXT: scratch_load_b32 v168, off, s32 offset:32 -; GFX11-FAKE16-NEXT: scratch_load_b32 v159, off, s32 offset:36 -; GFX11-FAKE16-NEXT: scratch_load_b32 v158, off, s32 offset:40 -; GFX11-FAKE16-NEXT: scratch_load_b32 v157, off, s32 offset:44 -; GFX11-FAKE16-NEXT: scratch_load_b32 v156, off, s32 offset:48 -; GFX11-FAKE16-NEXT: scratch_load_b32 v155, off, s32 offset:52 -; GFX11-FAKE16-NEXT: scratch_load_b32 v154, off, s32 offset:56 -; GFX11-FAKE16-NEXT: scratch_load_b32 v153, off, s32 offset:60 -; GFX11-FAKE16-NEXT: scratch_load_b32 v152, off, s32 offset:64 -; GFX11-FAKE16-NEXT: scratch_load_b32 v143, off, s32 offset:68 -; GFX11-FAKE16-NEXT: scratch_load_b32 v142, off, s32 offset:72 -; GFX11-FAKE16-NEXT: scratch_load_b32 v141, off, s32 offset:76 -; GFX11-FAKE16-NEXT: scratch_load_b32 v140, off, s32 offset:80 -; GFX11-FAKE16-NEXT: scratch_load_b32 v139, off, s32 offset:84 -; GFX11-FAKE16-NEXT: scratch_load_b32 v138, off, s32 offset:88 -; GFX11-FAKE16-NEXT: scratch_load_b32 v137, off, s32 offset:92 -; GFX11-FAKE16-NEXT: scratch_load_b32 v136, off, s32 offset:96 -; GFX11-FAKE16-NEXT: scratch_load_b32 v127, off, s32 offset:100 -; GFX11-FAKE16-NEXT: scratch_load_b32 v126, off, s32 offset:104 -; GFX11-FAKE16-NEXT: scratch_load_b32 v125, off, s32 offset:108 -; GFX11-FAKE16-NEXT: scratch_load_b32 v124, off, s32 offset:112 -; GFX11-FAKE16-NEXT: scratch_load_b32 v123, off, s32 offset:116 -; GFX11-FAKE16-NEXT: scratch_load_b32 v122, off, s32 offset:120 -; GFX11-FAKE16-NEXT: scratch_load_b32 v121, off, s32 offset:124 -; GFX11-FAKE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-FAKE16-NEXT: scratch_load_b32 v120, off, s32 offset:128 -; GFX11-FAKE16-NEXT: scratch_load_b32 v111, off, s32 offset:132 -; GFX11-FAKE16-NEXT: scratch_load_b32 v110, off, s32 offset:136 -; GFX11-FAKE16-NEXT: scratch_load_b32 v109, off, s32 offset:140 -; GFX11-FAKE16-NEXT: scratch_load_b32 v108, off, s32 offset:144 -; GFX11-FAKE16-NEXT: scratch_load_b32 v107, off, s32 offset:148 -; GFX11-FAKE16-NEXT: scratch_load_b32 v106, off, s32 offset:152 -; GFX11-FAKE16-NEXT: scratch_load_b32 v105, off, s32 offset:156 -; GFX11-FAKE16-NEXT: scratch_load_b32 v104, off, s32 offset:160 -; GFX11-FAKE16-NEXT: scratch_load_b32 v95, off, s32 offset:164 -; GFX11-FAKE16-NEXT: scratch_load_b32 v94, off, s32 offset:168 -; GFX11-FAKE16-NEXT: scratch_load_b32 v93, off, s32 offset:172 -; GFX11-FAKE16-NEXT: scratch_load_b32 v92, off, s32 offset:176 -; GFX11-FAKE16-NEXT: scratch_load_b32 v91, off, s32 offset:180 -; GFX11-FAKE16-NEXT: scratch_load_b32 v90, off, s32 offset:184 -; GFX11-FAKE16-NEXT: scratch_load_b32 v89, off, s32 offset:188 -; GFX11-FAKE16-NEXT: scratch_load_b32 v88, off, s32 offset:192 -; GFX11-FAKE16-NEXT: scratch_load_b32 v79, off, s32 offset:196 -; GFX11-FAKE16-NEXT: scratch_load_b32 v78, off, s32 offset:200 -; GFX11-FAKE16-NEXT: scratch_load_b32 v77, off, s32 offset:204 -; GFX11-FAKE16-NEXT: scratch_load_b32 v76, off, s32 offset:208 -; GFX11-FAKE16-NEXT: scratch_load_b32 v75, off, s32 offset:212 -; GFX11-FAKE16-NEXT: scratch_load_b32 v74, off, s32 offset:216 -; GFX11-FAKE16-NEXT: scratch_load_b32 v73, off, s32 offset:220 -; GFX11-FAKE16-NEXT: scratch_load_b32 v72, off, s32 offset:224 -; GFX11-FAKE16-NEXT: scratch_load_b32 v63, off, s32 offset:228 -; GFX11-FAKE16-NEXT: scratch_load_b32 v62, off, s32 offset:232 -; GFX11-FAKE16-NEXT: scratch_load_b32 v61, off, s32 offset:236 -; GFX11-FAKE16-NEXT: scratch_load_b32 v60, off, s32 offset:240 -; GFX11-FAKE16-NEXT: scratch_load_b32 v59, off, s32 offset:244 -; GFX11-FAKE16-NEXT: scratch_load_b32 v58, off, s32 offset:248 -; GFX11-FAKE16-NEXT: scratch_load_b32 v57, off, s32 offset:252 -; GFX11-FAKE16-NEXT: s_clause 0x8 ; 36-byte Folded Reload -; GFX11-FAKE16-NEXT: scratch_load_b32 v56, off, s32 offset:256 -; GFX11-FAKE16-NEXT: scratch_load_b32 v47, off, s32 offset:260 -; GFX11-FAKE16-NEXT: scratch_load_b32 v46, off, s32 offset:264 -; GFX11-FAKE16-NEXT: scratch_load_b32 v45, off, s32 offset:268 -; GFX11-FAKE16-NEXT: scratch_load_b32 v44, off, s32 offset:272 -; GFX11-FAKE16-NEXT: scratch_load_b32 v43, off, s32 offset:276 -; GFX11-FAKE16-NEXT: scratch_load_b32 v42, off, s32 offset:280 -; GFX11-FAKE16-NEXT: scratch_load_b32 v41, off, s32 offset:284 -; GFX11-FAKE16-NEXT: scratch_load_b32 v40, off, s32 offset:288 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, v32 :: v_dual_mov_b32 v1, v34 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v2, v37 :: v_dual_mov_b32 v5, v52 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v16, v183 :: v_dual_mov_b32 v21, v177 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v24, v176 :: v_dual_mov_b32 v27, v181 -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v28, v182 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v30, v179 :: v_dual_mov_b32 v31, v178 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-FAKE16-NEXT: .LBB19_4: -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr15 ; GFX11-FAKE16-NEXT: s_branch .LBB19_2 %cmp = icmp eq i32 %b, 0 br i1 %cmp, label %cmp.true, label %cmp.false @@ -34712,252 +34252,80 @@ define inreg <32 x i32> @bitcast_v64f16_to_v32i32_scalar(<64 x half> inreg %a, i ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 -; GFX11-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:292 -; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:288 -; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:284 -; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:280 -; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:276 -; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:272 -; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:268 -; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:264 -; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:260 -; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:256 -; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:252 -; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:248 -; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:244 -; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:240 -; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:236 -; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:232 -; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:228 -; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:224 -; GFX11-NEXT: scratch_store_b32 off, v74, s32 offset:220 -; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:216 -; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:212 -; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:208 -; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:204 -; GFX11-NEXT: scratch_store_b32 off, v79, s32 offset:200 -; GFX11-NEXT: scratch_store_b32 off, v88, s32 offset:196 -; GFX11-NEXT: scratch_store_b32 off, v89, s32 offset:192 -; GFX11-NEXT: scratch_store_b32 off, v90, s32 offset:188 -; GFX11-NEXT: scratch_store_b32 off, v91, s32 offset:184 -; GFX11-NEXT: scratch_store_b32 off, v92, s32 offset:180 -; GFX11-NEXT: scratch_store_b32 off, v93, s32 offset:176 -; GFX11-NEXT: scratch_store_b32 off, v94, s32 offset:172 -; GFX11-NEXT: scratch_store_b32 off, v95, s32 offset:168 -; GFX11-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-NEXT: scratch_store_b32 off, v104, s32 offset:164 -; GFX11-NEXT: scratch_store_b32 off, v105, s32 offset:160 -; GFX11-NEXT: scratch_store_b32 off, v106, s32 offset:156 -; GFX11-NEXT: scratch_store_b32 off, v107, s32 offset:152 -; GFX11-NEXT: scratch_store_b32 off, v108, s32 offset:148 -; GFX11-NEXT: scratch_store_b32 off, v109, s32 offset:144 -; GFX11-NEXT: scratch_store_b32 off, v110, s32 offset:140 -; GFX11-NEXT: scratch_store_b32 off, v111, s32 offset:136 -; GFX11-NEXT: scratch_store_b32 off, v120, s32 offset:132 -; GFX11-NEXT: scratch_store_b32 off, v121, s32 offset:128 -; GFX11-NEXT: scratch_store_b32 off, v122, s32 offset:124 -; GFX11-NEXT: scratch_store_b32 off, v123, s32 offset:120 -; GFX11-NEXT: scratch_store_b32 off, v124, s32 offset:116 -; GFX11-NEXT: scratch_store_b32 off, v125, s32 offset:112 -; GFX11-NEXT: scratch_store_b32 off, v126, s32 offset:108 -; GFX11-NEXT: scratch_store_b32 off, v127, s32 offset:104 -; GFX11-NEXT: scratch_store_b32 off, v136, s32 offset:100 -; GFX11-NEXT: scratch_store_b32 off, v137, s32 offset:96 -; GFX11-NEXT: scratch_store_b32 off, v138, s32 offset:92 -; GFX11-NEXT: scratch_store_b32 off, v139, s32 offset:88 -; GFX11-NEXT: scratch_store_b32 off, v140, s32 offset:84 -; GFX11-NEXT: scratch_store_b32 off, v141, s32 offset:80 -; GFX11-NEXT: scratch_store_b32 off, v142, s32 offset:76 -; GFX11-NEXT: scratch_store_b32 off, v143, s32 offset:72 -; GFX11-NEXT: scratch_store_b32 off, v152, s32 offset:68 -; GFX11-NEXT: scratch_store_b32 off, v153, s32 offset:64 -; GFX11-NEXT: scratch_store_b32 off, v154, s32 offset:60 -; GFX11-NEXT: scratch_store_b32 off, v155, s32 offset:56 -; GFX11-NEXT: scratch_store_b32 off, v156, s32 offset:52 -; GFX11-NEXT: scratch_store_b32 off, v157, s32 offset:48 -; GFX11-NEXT: scratch_store_b32 off, v158, s32 offset:44 -; GFX11-NEXT: scratch_store_b32 off, v159, s32 offset:40 -; GFX11-NEXT: s_clause 0x9 ; 40-byte Folded Spill -; GFX11-NEXT: scratch_store_b32 off, v168, s32 offset:36 -; GFX11-NEXT: scratch_store_b32 off, v169, s32 offset:32 -; GFX11-NEXT: scratch_store_b32 off, v170, s32 offset:28 -; GFX11-NEXT: scratch_store_b32 off, v171, s32 offset:24 -; GFX11-NEXT: scratch_store_b32 off, v172, s32 offset:20 -; GFX11-NEXT: scratch_store_b32 off, v173, s32 offset:16 -; GFX11-NEXT: scratch_store_b32 off, v174, s32 offset:12 -; GFX11-NEXT: scratch_store_b32 off, v175, s32 offset:8 -; GFX11-NEXT: scratch_store_b32 off, v184, s32 offset:4 -; GFX11-NEXT: scratch_store_b32 off, v185, s32 -; GFX11-NEXT: v_dual_mov_b32 v176, v13 :: v_dual_mov_b32 v177, v12 -; GFX11-NEXT: v_dual_mov_b32 v178, v11 :: v_dual_mov_b32 v179, v10 -; GFX11-NEXT: v_dual_mov_b32 v180, v9 :: v_dual_mov_b32 v181, v8 -; GFX11-NEXT: v_dual_mov_b32 v182, v7 :: v_dual_mov_b32 v183, v6 -; GFX11-NEXT: v_dual_mov_b32 v170, v5 :: v_dual_mov_b32 v171, v4 -; GFX11-NEXT: v_dual_mov_b32 v172, v3 :: v_dual_mov_b32 v173, v2 -; GFX11-NEXT: v_dual_mov_b32 v174, v1 :: v_dual_mov_b32 v175, v0 -; GFX11-NEXT: v_dual_mov_b32 v184, s28 :: v_dual_mov_b32 v185, s29 +; GFX11-NEXT: v_dual_mov_b32 v31, v13 :: v_dual_mov_b32 v30, v12 +; GFX11-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 +; GFX11-NEXT: v_dual_mov_b32 v27, v9 :: v_dual_mov_b32 v26, v8 +; GFX11-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-NEXT: v_dual_mov_b32 v16, s28 :: v_dual_mov_b32 v17, s29 ; GFX11-NEXT: s_mov_b32 s4, 0 ; GFX11-NEXT: s_and_b32 s5, vcc_lo, exec_lo ; GFX11-NEXT: s_cbranch_scc0 .LBB23_4 ; GFX11-NEXT: ; %bb.1: ; %cmp.false -; GFX11-NEXT: v_dual_mov_b32 v47, s0 :: v_dual_mov_b32 v52, s2 -; GFX11-NEXT: v_dual_mov_b32 v49, s1 :: v_dual_mov_b32 v56, s3 -; GFX11-NEXT: v_dual_mov_b32 v61, s16 :: v_dual_mov_b32 v74, s18 -; GFX11-NEXT: v_dual_mov_b32 v67, s17 :: v_dual_mov_b32 v82, s19 -; GFX11-NEXT: v_dual_mov_b32 v91, s20 :: v_dual_mov_b32 v112, s22 -; GFX11-NEXT: v_dual_mov_b32 v101, s21 :: v_dual_mov_b32 v124, s23 -; GFX11-NEXT: v_dual_mov_b32 v137, s24 :: v_dual_mov_b32 v14, s26 -; GFX11-NEXT: v_dual_mov_b32 v151, s25 :: v_dual_mov_b32 v30, s27 +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-NEXT: v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v5, s17 +; GFX11-NEXT: v_dual_mov_b32 v6, s18 :: v_dual_mov_b32 v7, s19 +; GFX11-NEXT: v_dual_mov_b32 v8, s20 :: v_dual_mov_b32 v9, s21 +; GFX11-NEXT: v_dual_mov_b32 v10, s22 :: v_dual_mov_b32 v11, s23 +; GFX11-NEXT: v_dual_mov_b32 v12, s24 :: v_dual_mov_b32 v13, s25 +; GFX11-NEXT: v_dual_mov_b32 v14, s26 :: v_dual_mov_b32 v15, s27 ; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 ; GFX11-NEXT: s_cbranch_vccnz .LBB23_3 ; GFX11-NEXT: .LBB23_2: ; %cmp.true -; GFX11-NEXT: v_pk_add_f16 v30, 0x200, s27 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v15, 0x200, s27 op_sel_hi:[0,1] ; GFX11-NEXT: v_pk_add_f16 v14, 0x200, s26 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v176, 0x200, v176 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v177, 0x200, v177 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v178, 0x200, v178 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v179, 0x200, v179 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v180, 0x200, v180 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v181, 0x200, v181 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v182, 0x200, v182 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v183, 0x200, v183 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v170, 0x200, v170 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v171, 0x200, v171 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v172, 0x200, v172 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v173, 0x200, v173 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v174, 0x200, v174 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v175, 0x200, v175 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v185, 0x200, v185 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v184, 0x200, v184 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v151, 0x200, s25 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v137, 0x200, s24 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v124, 0x200, s23 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v112, 0x200, s22 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v101, 0x200, s21 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v91, 0x200, s20 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v82, 0x200, s19 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v74, 0x200, s18 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v67, 0x200, s17 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v61, 0x200, s16 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v56, 0x200, s3 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v52, 0x200, s2 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v49, 0x200, s1 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v47, 0x200, s0 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v13, 0x200, s25 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v12, 0x200, s24 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v11, 0x200, s23 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v10, 0x200, s22 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v9, 0x200, s21 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v8, 0x200, s20 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v7, 0x200, s19 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v6, 0x200, s18 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v5, 0x200, s17 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v4, 0x200, s16 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v3, 0x200, s3 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v2, 0x200, s2 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v1, 0x200, s1 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v0, 0x200, s0 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v31, 0x200, v31 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v30, 0x200, v30 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v29, 0x200, v29 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v28, 0x200, v28 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v27, 0x200, v27 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v26, 0x200, v26 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v25, 0x200, v25 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v24, 0x200, v24 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v23, 0x200, v23 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v22, 0x200, v22 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v21, 0x200, v21 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v20, 0x200, v20 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v19, 0x200, v19 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v18, 0x200, v18 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v17, 0x200, v17 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v16, 0x200, v16 op_sel_hi:[0,1] ; GFX11-NEXT: .LBB23_3: ; %end -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_dual_mov_b32 v0, v47 :: v_dual_mov_b32 v1, v49 -; GFX11-NEXT: v_dual_mov_b32 v3, v56 :: v_dual_mov_b32 v4, v61 -; GFX11-NEXT: v_dual_mov_b32 v6, v74 :: v_dual_mov_b32 v9, v101 -; GFX11-NEXT: v_dual_mov_b32 v7, v82 :: v_dual_mov_b32 v8, v91 -; GFX11-NEXT: v_dual_mov_b32 v11, v124 :: v_dual_mov_b32 v12, v137 -; GFX11-NEXT: v_dual_mov_b32 v15, v30 :: v_dual_mov_b32 v16, v184 -; GFX11-NEXT: v_dual_mov_b32 v17, v185 :: v_dual_mov_b32 v18, v175 -; GFX11-NEXT: v_dual_mov_b32 v19, v174 :: v_dual_mov_b32 v20, v173 -; GFX11-NEXT: v_dual_mov_b32 v21, v172 :: v_dual_mov_b32 v22, v171 -; GFX11-NEXT: v_dual_mov_b32 v23, v170 :: v_dual_mov_b32 v24, v183 -; GFX11-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-NEXT: scratch_load_b32 v185, off, s32 -; GFX11-NEXT: scratch_load_b32 v184, off, s32 offset:4 -; GFX11-NEXT: scratch_load_b32 v175, off, s32 offset:8 -; GFX11-NEXT: scratch_load_b32 v174, off, s32 offset:12 -; GFX11-NEXT: scratch_load_b32 v173, off, s32 offset:16 -; GFX11-NEXT: scratch_load_b32 v172, off, s32 offset:20 -; GFX11-NEXT: scratch_load_b32 v171, off, s32 offset:24 -; GFX11-NEXT: scratch_load_b32 v170, off, s32 offset:28 -; GFX11-NEXT: scratch_load_b32 v169, off, s32 offset:32 -; GFX11-NEXT: scratch_load_b32 v168, off, s32 offset:36 -; GFX11-NEXT: scratch_load_b32 v159, off, s32 offset:40 -; GFX11-NEXT: scratch_load_b32 v158, off, s32 offset:44 -; GFX11-NEXT: scratch_load_b32 v157, off, s32 offset:48 -; GFX11-NEXT: scratch_load_b32 v156, off, s32 offset:52 -; GFX11-NEXT: scratch_load_b32 v155, off, s32 offset:56 -; GFX11-NEXT: scratch_load_b32 v154, off, s32 offset:60 -; GFX11-NEXT: scratch_load_b32 v153, off, s32 offset:64 -; GFX11-NEXT: scratch_load_b32 v152, off, s32 offset:68 -; GFX11-NEXT: scratch_load_b32 v143, off, s32 offset:72 -; GFX11-NEXT: scratch_load_b32 v142, off, s32 offset:76 -; GFX11-NEXT: scratch_load_b32 v141, off, s32 offset:80 -; GFX11-NEXT: scratch_load_b32 v140, off, s32 offset:84 -; GFX11-NEXT: scratch_load_b32 v139, off, s32 offset:88 -; GFX11-NEXT: scratch_load_b32 v138, off, s32 offset:92 -; GFX11-NEXT: scratch_load_b32 v137, off, s32 offset:96 -; GFX11-NEXT: scratch_load_b32 v136, off, s32 offset:100 -; GFX11-NEXT: scratch_load_b32 v127, off, s32 offset:104 -; GFX11-NEXT: scratch_load_b32 v126, off, s32 offset:108 -; GFX11-NEXT: scratch_load_b32 v125, off, s32 offset:112 -; GFX11-NEXT: scratch_load_b32 v124, off, s32 offset:116 -; GFX11-NEXT: scratch_load_b32 v123, off, s32 offset:120 -; GFX11-NEXT: scratch_load_b32 v122, off, s32 offset:124 -; GFX11-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-NEXT: scratch_load_b32 v121, off, s32 offset:128 -; GFX11-NEXT: scratch_load_b32 v120, off, s32 offset:132 -; GFX11-NEXT: scratch_load_b32 v111, off, s32 offset:136 -; GFX11-NEXT: scratch_load_b32 v110, off, s32 offset:140 -; GFX11-NEXT: scratch_load_b32 v109, off, s32 offset:144 -; GFX11-NEXT: scratch_load_b32 v108, off, s32 offset:148 -; GFX11-NEXT: scratch_load_b32 v107, off, s32 offset:152 -; GFX11-NEXT: scratch_load_b32 v106, off, s32 offset:156 -; GFX11-NEXT: scratch_load_b32 v105, off, s32 offset:160 -; GFX11-NEXT: scratch_load_b32 v104, off, s32 offset:164 -; GFX11-NEXT: scratch_load_b32 v95, off, s32 offset:168 -; GFX11-NEXT: scratch_load_b32 v94, off, s32 offset:172 -; GFX11-NEXT: scratch_load_b32 v93, off, s32 offset:176 -; GFX11-NEXT: scratch_load_b32 v92, off, s32 offset:180 -; GFX11-NEXT: scratch_load_b32 v91, off, s32 offset:184 -; GFX11-NEXT: scratch_load_b32 v90, off, s32 offset:188 -; GFX11-NEXT: scratch_load_b32 v89, off, s32 offset:192 -; GFX11-NEXT: scratch_load_b32 v88, off, s32 offset:196 -; GFX11-NEXT: scratch_load_b32 v79, off, s32 offset:200 -; GFX11-NEXT: scratch_load_b32 v78, off, s32 offset:204 -; GFX11-NEXT: scratch_load_b32 v77, off, s32 offset:208 -; GFX11-NEXT: scratch_load_b32 v76, off, s32 offset:212 -; GFX11-NEXT: scratch_load_b32 v75, off, s32 offset:216 -; GFX11-NEXT: scratch_load_b32 v74, off, s32 offset:220 -; GFX11-NEXT: scratch_load_b32 v73, off, s32 offset:224 -; GFX11-NEXT: scratch_load_b32 v72, off, s32 offset:228 -; GFX11-NEXT: scratch_load_b32 v63, off, s32 offset:232 -; GFX11-NEXT: scratch_load_b32 v62, off, s32 offset:236 -; GFX11-NEXT: scratch_load_b32 v61, off, s32 offset:240 -; GFX11-NEXT: scratch_load_b32 v60, off, s32 offset:244 -; GFX11-NEXT: scratch_load_b32 v59, off, s32 offset:248 -; GFX11-NEXT: scratch_load_b32 v58, off, s32 offset:252 -; GFX11-NEXT: s_clause 0x9 ; 40-byte Folded Reload -; GFX11-NEXT: scratch_load_b32 v57, off, s32 offset:256 -; GFX11-NEXT: scratch_load_b32 v56, off, s32 offset:260 -; GFX11-NEXT: scratch_load_b32 v47, off, s32 offset:264 -; GFX11-NEXT: scratch_load_b32 v46, off, s32 offset:268 -; GFX11-NEXT: scratch_load_b32 v45, off, s32 offset:272 -; GFX11-NEXT: scratch_load_b32 v44, off, s32 offset:276 -; GFX11-NEXT: scratch_load_b32 v43, off, s32 offset:280 -; GFX11-NEXT: scratch_load_b32 v42, off, s32 offset:284 -; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:288 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:292 -; GFX11-NEXT: v_dual_mov_b32 v2, v52 :: v_dual_mov_b32 v5, v67 -; GFX11-NEXT: v_dual_mov_b32 v10, v112 :: v_dual_mov_b32 v13, v151 -; GFX11-NEXT: v_dual_mov_b32 v25, v182 :: v_dual_mov_b32 v26, v181 -; GFX11-NEXT: v_dual_mov_b32 v27, v180 :: v_dual_mov_b32 v28, v179 -; GFX11-NEXT: v_dual_mov_b32 v29, v178 :: v_dual_mov_b32 v30, v177 -; GFX11-NEXT: v_mov_b32_e32 v31, v176 -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; GFX11-NEXT: .LBB23_4: -; GFX11-NEXT: ; implicit-def: $vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78 -; GFX11-NEXT: ; implicit-def: $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79 -; GFX11-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-NEXT: ; implicit-def: $vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81 -; GFX11-NEXT: ; implicit-def: $vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84 -; GFX11-NEXT: ; implicit-def: $vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88 -; GFX11-NEXT: ; implicit-def: $vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93 -; GFX11-NEXT: ; implicit-def: $vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99 -; GFX11-NEXT: ; implicit-def: $vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106 -; GFX11-NEXT: ; implicit-def: $vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114 -; GFX11-NEXT: ; implicit-def: $vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123 -; GFX11-NEXT: ; implicit-def: $vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133 -; GFX11-NEXT: ; implicit-def: $vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144 -; GFX11-NEXT: ; implicit-def: $vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156 -; GFX11-NEXT: ; implicit-def: $vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169 +; GFX11-NEXT: ; implicit-def: $vgpr0 +; GFX11-NEXT: ; implicit-def: $vgpr1 +; GFX11-NEXT: ; implicit-def: $vgpr2 +; GFX11-NEXT: ; implicit-def: $vgpr3 +; GFX11-NEXT: ; implicit-def: $vgpr4 +; GFX11-NEXT: ; implicit-def: $vgpr5 +; GFX11-NEXT: ; implicit-def: $vgpr6 +; GFX11-NEXT: ; implicit-def: $vgpr7 +; GFX11-NEXT: ; implicit-def: $vgpr8 +; GFX11-NEXT: ; implicit-def: $vgpr9 +; GFX11-NEXT: ; implicit-def: $vgpr10 +; GFX11-NEXT: ; implicit-def: $vgpr11 +; GFX11-NEXT: ; implicit-def: $vgpr12 +; GFX11-NEXT: ; implicit-def: $vgpr13 +; GFX11-NEXT: ; implicit-def: $vgpr14 +; GFX11-NEXT: ; implicit-def: $vgpr15 ; GFX11-NEXT: s_branch .LBB23_2 %cmp = icmp eq i32 %b, 0 br i1 %cmp, label %cmp.true, label %cmp.false @@ -37758,252 +37126,80 @@ define inreg <32 x i32> @bitcast_v64i16_to_v32i32_scalar(<64 x i16> inreg %a, i3 ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 -; GFX11-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:292 -; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:288 -; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:284 -; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:280 -; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:276 -; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:272 -; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:268 -; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:264 -; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:260 -; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:256 -; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:252 -; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:248 -; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:244 -; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:240 -; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:236 -; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:232 -; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:228 -; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:224 -; GFX11-NEXT: scratch_store_b32 off, v74, s32 offset:220 -; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:216 -; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:212 -; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:208 -; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:204 -; GFX11-NEXT: scratch_store_b32 off, v79, s32 offset:200 -; GFX11-NEXT: scratch_store_b32 off, v88, s32 offset:196 -; GFX11-NEXT: scratch_store_b32 off, v89, s32 offset:192 -; GFX11-NEXT: scratch_store_b32 off, v90, s32 offset:188 -; GFX11-NEXT: scratch_store_b32 off, v91, s32 offset:184 -; GFX11-NEXT: scratch_store_b32 off, v92, s32 offset:180 -; GFX11-NEXT: scratch_store_b32 off, v93, s32 offset:176 -; GFX11-NEXT: scratch_store_b32 off, v94, s32 offset:172 -; GFX11-NEXT: scratch_store_b32 off, v95, s32 offset:168 -; GFX11-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-NEXT: scratch_store_b32 off, v104, s32 offset:164 -; GFX11-NEXT: scratch_store_b32 off, v105, s32 offset:160 -; GFX11-NEXT: scratch_store_b32 off, v106, s32 offset:156 -; GFX11-NEXT: scratch_store_b32 off, v107, s32 offset:152 -; GFX11-NEXT: scratch_store_b32 off, v108, s32 offset:148 -; GFX11-NEXT: scratch_store_b32 off, v109, s32 offset:144 -; GFX11-NEXT: scratch_store_b32 off, v110, s32 offset:140 -; GFX11-NEXT: scratch_store_b32 off, v111, s32 offset:136 -; GFX11-NEXT: scratch_store_b32 off, v120, s32 offset:132 -; GFX11-NEXT: scratch_store_b32 off, v121, s32 offset:128 -; GFX11-NEXT: scratch_store_b32 off, v122, s32 offset:124 -; GFX11-NEXT: scratch_store_b32 off, v123, s32 offset:120 -; GFX11-NEXT: scratch_store_b32 off, v124, s32 offset:116 -; GFX11-NEXT: scratch_store_b32 off, v125, s32 offset:112 -; GFX11-NEXT: scratch_store_b32 off, v126, s32 offset:108 -; GFX11-NEXT: scratch_store_b32 off, v127, s32 offset:104 -; GFX11-NEXT: scratch_store_b32 off, v136, s32 offset:100 -; GFX11-NEXT: scratch_store_b32 off, v137, s32 offset:96 -; GFX11-NEXT: scratch_store_b32 off, v138, s32 offset:92 -; GFX11-NEXT: scratch_store_b32 off, v139, s32 offset:88 -; GFX11-NEXT: scratch_store_b32 off, v140, s32 offset:84 -; GFX11-NEXT: scratch_store_b32 off, v141, s32 offset:80 -; GFX11-NEXT: scratch_store_b32 off, v142, s32 offset:76 -; GFX11-NEXT: scratch_store_b32 off, v143, s32 offset:72 -; GFX11-NEXT: scratch_store_b32 off, v152, s32 offset:68 -; GFX11-NEXT: scratch_store_b32 off, v153, s32 offset:64 -; GFX11-NEXT: scratch_store_b32 off, v154, s32 offset:60 -; GFX11-NEXT: scratch_store_b32 off, v155, s32 offset:56 -; GFX11-NEXT: scratch_store_b32 off, v156, s32 offset:52 -; GFX11-NEXT: scratch_store_b32 off, v157, s32 offset:48 -; GFX11-NEXT: scratch_store_b32 off, v158, s32 offset:44 -; GFX11-NEXT: scratch_store_b32 off, v159, s32 offset:40 -; GFX11-NEXT: s_clause 0x9 ; 40-byte Folded Spill -; GFX11-NEXT: scratch_store_b32 off, v168, s32 offset:36 -; GFX11-NEXT: scratch_store_b32 off, v169, s32 offset:32 -; GFX11-NEXT: scratch_store_b32 off, v170, s32 offset:28 -; GFX11-NEXT: scratch_store_b32 off, v171, s32 offset:24 -; GFX11-NEXT: scratch_store_b32 off, v172, s32 offset:20 -; GFX11-NEXT: scratch_store_b32 off, v173, s32 offset:16 -; GFX11-NEXT: scratch_store_b32 off, v174, s32 offset:12 -; GFX11-NEXT: scratch_store_b32 off, v175, s32 offset:8 -; GFX11-NEXT: scratch_store_b32 off, v184, s32 offset:4 -; GFX11-NEXT: scratch_store_b32 off, v185, s32 -; GFX11-NEXT: v_dual_mov_b32 v176, v13 :: v_dual_mov_b32 v177, v12 -; GFX11-NEXT: v_dual_mov_b32 v178, v11 :: v_dual_mov_b32 v179, v10 -; GFX11-NEXT: v_dual_mov_b32 v180, v9 :: v_dual_mov_b32 v181, v8 -; GFX11-NEXT: v_dual_mov_b32 v182, v7 :: v_dual_mov_b32 v183, v6 -; GFX11-NEXT: v_dual_mov_b32 v170, v5 :: v_dual_mov_b32 v171, v4 -; GFX11-NEXT: v_dual_mov_b32 v172, v3 :: v_dual_mov_b32 v173, v2 -; GFX11-NEXT: v_dual_mov_b32 v174, v1 :: v_dual_mov_b32 v175, v0 -; GFX11-NEXT: v_dual_mov_b32 v184, s28 :: v_dual_mov_b32 v185, s29 +; GFX11-NEXT: v_dual_mov_b32 v31, v13 :: v_dual_mov_b32 v30, v12 +; GFX11-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 +; GFX11-NEXT: v_dual_mov_b32 v27, v9 :: v_dual_mov_b32 v26, v8 +; GFX11-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-NEXT: v_dual_mov_b32 v16, s28 :: v_dual_mov_b32 v17, s29 ; GFX11-NEXT: s_mov_b32 s4, 0 ; GFX11-NEXT: s_and_b32 s5, vcc_lo, exec_lo ; GFX11-NEXT: s_cbranch_scc0 .LBB27_4 ; GFX11-NEXT: ; %bb.1: ; %cmp.false -; GFX11-NEXT: v_dual_mov_b32 v47, s0 :: v_dual_mov_b32 v52, s2 -; GFX11-NEXT: v_dual_mov_b32 v49, s1 :: v_dual_mov_b32 v56, s3 -; GFX11-NEXT: v_dual_mov_b32 v61, s16 :: v_dual_mov_b32 v74, s18 -; GFX11-NEXT: v_dual_mov_b32 v67, s17 :: v_dual_mov_b32 v82, s19 -; GFX11-NEXT: v_dual_mov_b32 v91, s20 :: v_dual_mov_b32 v112, s22 -; GFX11-NEXT: v_dual_mov_b32 v101, s21 :: v_dual_mov_b32 v124, s23 -; GFX11-NEXT: v_dual_mov_b32 v137, s24 :: v_dual_mov_b32 v14, s26 -; GFX11-NEXT: v_dual_mov_b32 v151, s25 :: v_dual_mov_b32 v30, s27 +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-NEXT: v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v5, s17 +; GFX11-NEXT: v_dual_mov_b32 v6, s18 :: v_dual_mov_b32 v7, s19 +; GFX11-NEXT: v_dual_mov_b32 v8, s20 :: v_dual_mov_b32 v9, s21 +; GFX11-NEXT: v_dual_mov_b32 v10, s22 :: v_dual_mov_b32 v11, s23 +; GFX11-NEXT: v_dual_mov_b32 v12, s24 :: v_dual_mov_b32 v13, s25 +; GFX11-NEXT: v_dual_mov_b32 v14, s26 :: v_dual_mov_b32 v15, s27 ; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 ; GFX11-NEXT: s_cbranch_vccnz .LBB27_3 ; GFX11-NEXT: .LBB27_2: ; %cmp.true -; GFX11-NEXT: v_pk_add_u16 v30, s27, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v15, s27, 3 op_sel_hi:[1,0] ; GFX11-NEXT: v_pk_add_u16 v14, s26, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v176, v176, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v177, v177, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v178, v178, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v179, v179, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v180, v180, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v181, v181, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v182, v182, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v183, v183, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v170, v170, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v171, v171, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v172, v172, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v173, v173, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v174, v174, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v175, v175, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v185, v185, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v184, v184, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v151, s25, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v137, s24, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v124, s23, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v112, s22, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v101, s21, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v91, s20, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v82, s19, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v74, s18, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v67, s17, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v61, s16, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v56, s3, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v52, s2, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v49, s1, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v47, s0, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v13, s25, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v12, s24, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v11, s23, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v10, s22, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v9, s21, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v8, s20, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v7, s19, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v6, s18, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v5, s17, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v4, s16, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v3, s3, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v2, s2, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v1, s1, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v0, s0, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v31, v31, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v30, v30, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v29, v29, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v28, v28, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v27, v27, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v26, v26, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v25, v25, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v24, v24, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v23, v23, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v22, v22, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v21, v21, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v20, v20, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v19, v19, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v18, v18, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v17, v17, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v16, v16, 3 op_sel_hi:[1,0] ; GFX11-NEXT: .LBB27_3: ; %end -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_dual_mov_b32 v0, v47 :: v_dual_mov_b32 v1, v49 -; GFX11-NEXT: v_dual_mov_b32 v3, v56 :: v_dual_mov_b32 v4, v61 -; GFX11-NEXT: v_dual_mov_b32 v6, v74 :: v_dual_mov_b32 v9, v101 -; GFX11-NEXT: v_dual_mov_b32 v7, v82 :: v_dual_mov_b32 v8, v91 -; GFX11-NEXT: v_dual_mov_b32 v11, v124 :: v_dual_mov_b32 v12, v137 -; GFX11-NEXT: v_dual_mov_b32 v15, v30 :: v_dual_mov_b32 v16, v184 -; GFX11-NEXT: v_dual_mov_b32 v17, v185 :: v_dual_mov_b32 v18, v175 -; GFX11-NEXT: v_dual_mov_b32 v19, v174 :: v_dual_mov_b32 v20, v173 -; GFX11-NEXT: v_dual_mov_b32 v21, v172 :: v_dual_mov_b32 v22, v171 -; GFX11-NEXT: v_dual_mov_b32 v23, v170 :: v_dual_mov_b32 v24, v183 -; GFX11-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-NEXT: scratch_load_b32 v185, off, s32 -; GFX11-NEXT: scratch_load_b32 v184, off, s32 offset:4 -; GFX11-NEXT: scratch_load_b32 v175, off, s32 offset:8 -; GFX11-NEXT: scratch_load_b32 v174, off, s32 offset:12 -; GFX11-NEXT: scratch_load_b32 v173, off, s32 offset:16 -; GFX11-NEXT: scratch_load_b32 v172, off, s32 offset:20 -; GFX11-NEXT: scratch_load_b32 v171, off, s32 offset:24 -; GFX11-NEXT: scratch_load_b32 v170, off, s32 offset:28 -; GFX11-NEXT: scratch_load_b32 v169, off, s32 offset:32 -; GFX11-NEXT: scratch_load_b32 v168, off, s32 offset:36 -; GFX11-NEXT: scratch_load_b32 v159, off, s32 offset:40 -; GFX11-NEXT: scratch_load_b32 v158, off, s32 offset:44 -; GFX11-NEXT: scratch_load_b32 v157, off, s32 offset:48 -; GFX11-NEXT: scratch_load_b32 v156, off, s32 offset:52 -; GFX11-NEXT: scratch_load_b32 v155, off, s32 offset:56 -; GFX11-NEXT: scratch_load_b32 v154, off, s32 offset:60 -; GFX11-NEXT: scratch_load_b32 v153, off, s32 offset:64 -; GFX11-NEXT: scratch_load_b32 v152, off, s32 offset:68 -; GFX11-NEXT: scratch_load_b32 v143, off, s32 offset:72 -; GFX11-NEXT: scratch_load_b32 v142, off, s32 offset:76 -; GFX11-NEXT: scratch_load_b32 v141, off, s32 offset:80 -; GFX11-NEXT: scratch_load_b32 v140, off, s32 offset:84 -; GFX11-NEXT: scratch_load_b32 v139, off, s32 offset:88 -; GFX11-NEXT: scratch_load_b32 v138, off, s32 offset:92 -; GFX11-NEXT: scratch_load_b32 v137, off, s32 offset:96 -; GFX11-NEXT: scratch_load_b32 v136, off, s32 offset:100 -; GFX11-NEXT: scratch_load_b32 v127, off, s32 offset:104 -; GFX11-NEXT: scratch_load_b32 v126, off, s32 offset:108 -; GFX11-NEXT: scratch_load_b32 v125, off, s32 offset:112 -; GFX11-NEXT: scratch_load_b32 v124, off, s32 offset:116 -; GFX11-NEXT: scratch_load_b32 v123, off, s32 offset:120 -; GFX11-NEXT: scratch_load_b32 v122, off, s32 offset:124 -; GFX11-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-NEXT: scratch_load_b32 v121, off, s32 offset:128 -; GFX11-NEXT: scratch_load_b32 v120, off, s32 offset:132 -; GFX11-NEXT: scratch_load_b32 v111, off, s32 offset:136 -; GFX11-NEXT: scratch_load_b32 v110, off, s32 offset:140 -; GFX11-NEXT: scratch_load_b32 v109, off, s32 offset:144 -; GFX11-NEXT: scratch_load_b32 v108, off, s32 offset:148 -; GFX11-NEXT: scratch_load_b32 v107, off, s32 offset:152 -; GFX11-NEXT: scratch_load_b32 v106, off, s32 offset:156 -; GFX11-NEXT: scratch_load_b32 v105, off, s32 offset:160 -; GFX11-NEXT: scratch_load_b32 v104, off, s32 offset:164 -; GFX11-NEXT: scratch_load_b32 v95, off, s32 offset:168 -; GFX11-NEXT: scratch_load_b32 v94, off, s32 offset:172 -; GFX11-NEXT: scratch_load_b32 v93, off, s32 offset:176 -; GFX11-NEXT: scratch_load_b32 v92, off, s32 offset:180 -; GFX11-NEXT: scratch_load_b32 v91, off, s32 offset:184 -; GFX11-NEXT: scratch_load_b32 v90, off, s32 offset:188 -; GFX11-NEXT: scratch_load_b32 v89, off, s32 offset:192 -; GFX11-NEXT: scratch_load_b32 v88, off, s32 offset:196 -; GFX11-NEXT: scratch_load_b32 v79, off, s32 offset:200 -; GFX11-NEXT: scratch_load_b32 v78, off, s32 offset:204 -; GFX11-NEXT: scratch_load_b32 v77, off, s32 offset:208 -; GFX11-NEXT: scratch_load_b32 v76, off, s32 offset:212 -; GFX11-NEXT: scratch_load_b32 v75, off, s32 offset:216 -; GFX11-NEXT: scratch_load_b32 v74, off, s32 offset:220 -; GFX11-NEXT: scratch_load_b32 v73, off, s32 offset:224 -; GFX11-NEXT: scratch_load_b32 v72, off, s32 offset:228 -; GFX11-NEXT: scratch_load_b32 v63, off, s32 offset:232 -; GFX11-NEXT: scratch_load_b32 v62, off, s32 offset:236 -; GFX11-NEXT: scratch_load_b32 v61, off, s32 offset:240 -; GFX11-NEXT: scratch_load_b32 v60, off, s32 offset:244 -; GFX11-NEXT: scratch_load_b32 v59, off, s32 offset:248 -; GFX11-NEXT: scratch_load_b32 v58, off, s32 offset:252 -; GFX11-NEXT: s_clause 0x9 ; 40-byte Folded Reload -; GFX11-NEXT: scratch_load_b32 v57, off, s32 offset:256 -; GFX11-NEXT: scratch_load_b32 v56, off, s32 offset:260 -; GFX11-NEXT: scratch_load_b32 v47, off, s32 offset:264 -; GFX11-NEXT: scratch_load_b32 v46, off, s32 offset:268 -; GFX11-NEXT: scratch_load_b32 v45, off, s32 offset:272 -; GFX11-NEXT: scratch_load_b32 v44, off, s32 offset:276 -; GFX11-NEXT: scratch_load_b32 v43, off, s32 offset:280 -; GFX11-NEXT: scratch_load_b32 v42, off, s32 offset:284 -; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:288 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:292 -; GFX11-NEXT: v_dual_mov_b32 v2, v52 :: v_dual_mov_b32 v5, v67 -; GFX11-NEXT: v_dual_mov_b32 v10, v112 :: v_dual_mov_b32 v13, v151 -; GFX11-NEXT: v_dual_mov_b32 v25, v182 :: v_dual_mov_b32 v26, v181 -; GFX11-NEXT: v_dual_mov_b32 v27, v180 :: v_dual_mov_b32 v28, v179 -; GFX11-NEXT: v_dual_mov_b32 v29, v178 :: v_dual_mov_b32 v30, v177 -; GFX11-NEXT: v_mov_b32_e32 v31, v176 -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; GFX11-NEXT: .LBB27_4: -; GFX11-NEXT: ; implicit-def: $vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78 -; GFX11-NEXT: ; implicit-def: $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79 -; GFX11-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-NEXT: ; implicit-def: $vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81 -; GFX11-NEXT: ; implicit-def: $vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84 -; GFX11-NEXT: ; implicit-def: $vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88 -; GFX11-NEXT: ; implicit-def: $vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93 -; GFX11-NEXT: ; implicit-def: $vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99 -; GFX11-NEXT: ; implicit-def: $vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106 -; GFX11-NEXT: ; implicit-def: $vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114 -; GFX11-NEXT: ; implicit-def: $vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123 -; GFX11-NEXT: ; implicit-def: $vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133 -; GFX11-NEXT: ; implicit-def: $vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144 -; GFX11-NEXT: ; implicit-def: $vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156 -; GFX11-NEXT: ; implicit-def: $vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169 +; GFX11-NEXT: ; implicit-def: $vgpr0 +; GFX11-NEXT: ; implicit-def: $vgpr1 +; GFX11-NEXT: ; implicit-def: $vgpr2 +; GFX11-NEXT: ; implicit-def: $vgpr3 +; GFX11-NEXT: ; implicit-def: $vgpr4 +; GFX11-NEXT: ; implicit-def: $vgpr5 +; GFX11-NEXT: ; implicit-def: $vgpr6 +; GFX11-NEXT: ; implicit-def: $vgpr7 +; GFX11-NEXT: ; implicit-def: $vgpr8 +; GFX11-NEXT: ; implicit-def: $vgpr9 +; GFX11-NEXT: ; implicit-def: $vgpr10 +; GFX11-NEXT: ; implicit-def: $vgpr11 +; GFX11-NEXT: ; implicit-def: $vgpr12 +; GFX11-NEXT: ; implicit-def: $vgpr13 +; GFX11-NEXT: ; implicit-def: $vgpr14 +; GFX11-NEXT: ; implicit-def: $vgpr15 ; GFX11-NEXT: s_branch .LBB27_2 %cmp = icmp eq i32 %b, 0 br i1 %cmp, label %cmp.true, label %cmp.false @@ -67245,1076 +66441,704 @@ define inreg <32 x float> @bitcast_v64bf16_to_v32f32_scalar(<64 x bfloat> inreg ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:156 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:28 -; GFX11-TRUE16-NEXT: s_clause 0x6 ; 28-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v167, v13 :: v_dual_mov_b32 v176, v12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v177, v11 :: v_dual_mov_b32 v178, v10 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v179, v9 :: v_dual_mov_b32 v180, v8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v181, v7 :: v_dual_mov_b32 v182, v6 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v183, v5 :: v_dual_mov_b32 v168, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v169, v3 :: v_dual_mov_b32 v170, v2 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v171, v1 :: v_dual_mov_b32 v172, v0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v174, s28 :: v_dual_mov_b32 v173, s29 -; GFX11-TRUE16-NEXT: s_mov_b32 s4, 0 -; GFX11-TRUE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo -; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB43_4 -; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v135, s0 :: v_dual_mov_b32 v134, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v132, s2 :: v_dual_mov_b32 v129, s3 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v125, s16 :: v_dual_mov_b32 v120, s17 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v114, s18 :: v_dual_mov_b32 v107, s19 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v99, s20 :: v_dual_mov_b32 v90, s21 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v80, s22 :: v_dual_mov_b32 v69, s23 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v57, s24 :: v_dual_mov_b32 v44, s25 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, s26 :: v_dual_mov_b32 v15, s27 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 -; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB43_3 -; GFX11-TRUE16-NEXT: .LBB43_2: ; %cmp.true -; GFX11-TRUE16-NEXT: s_lshl_b32 s5, s27, 16 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s27, 0xffff0000 -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s5 -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s26, 0xffff0000 -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s26, 16 -; GFX11-TRUE16-NEXT: v_add_f32_e64 v3, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: v_bfe_u32 v4, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_bfe_u32 v7, v3, 16, 1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, v4, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s25, 0xffff0000 -; GFX11-TRUE16-NEXT: s_lshl_b32 s7, s25, 16 -; GFX11-TRUE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 0x7fff, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v0 -; GFX11-TRUE16-NEXT: v_add_f32_e64 v6, 0x40c00000, s5 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s24, 0xffff0000 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v2, v8 :: v_dual_add_nc_u32 v7, v7, v3 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v3 -; GFX11-TRUE16-NEXT: v_bfe_u32 v8, v5, 16, 1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v4, v9, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v4, 0x40c00000, s7 -; GFX11-TRUE16-NEXT: v_bfe_u32 v9, v6, 16, 1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v15, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, 0x400000, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 0x7fff, v7 -; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v4, 16, 1 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s24, 16 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v2, v7, v2 :: v_dual_add_nc_u32 v7, v8, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, v9, v6 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, 0x400000, v5 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v3, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 0x7fff, v8 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, 0x400000, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v6, v7, v8, vcc_lo -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc_lo -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v6 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v30, 16, v3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v30.h, v1.l -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc_lo -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v44, 16, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v44.h, v4.l -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s23, 0xffff0000 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v57, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v57.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s23, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s22, 0xffff0000 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v69, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v69.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s22, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s21, 0xffff0000 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v80, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v80.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s21, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s20, 0xffff0000 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v90, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v90.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s20, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s19, 0xffff0000 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v99, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v99.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s19, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s18, 0xffff0000 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v107, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v107.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s17, 0xffff0000 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v114, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v114.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s17, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s16, 0xffff0000 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v120, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v120.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s16, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s3, 0xffff0000 -; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s3, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v125, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v125.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s3 -; GFX11-TRUE16-NEXT: s_and_b32 s3, s2, 0xffff0000 -; GFX11-TRUE16-NEXT: s_lshl_b32 s2, s2, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v129, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v129.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s3 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s2 -; GFX11-TRUE16-NEXT: s_and_b32 s2, s1, 0xffff0000 -; GFX11-TRUE16-NEXT: s_lshl_b32 s1, s1, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v132, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v132.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s2 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s1 -; GFX11-TRUE16-NEXT: s_and_b32 s1, s0, 0xffff0000 -; GFX11-TRUE16-NEXT: s_lshl_b32 s0, s0, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v134, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v134.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s1 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s0 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v135, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v135.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v167 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v167 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v167, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v167.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v176 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v176 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v176, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v176.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v177 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, v13 :: v_dual_mov_b32 v30, v12 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v9 :: v_dual_mov_b32 v26, v8 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s28 :: v_dual_mov_b32 v17, s29 +; GFX11-TRUE16-NEXT: s_mov_b32 s4, 0 +; GFX11-TRUE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo +; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB43_4 +; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v5, s17 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s18 :: v_dual_mov_b32 v7, s19 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s20 :: v_dual_mov_b32 v9, s21 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s22 :: v_dual_mov_b32 v11, s23 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s24 :: v_dual_mov_b32 v13, s25 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s26 :: v_dual_mov_b32 v15, s27 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 +; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB43_3 +; GFX11-TRUE16-NEXT: .LBB43_2: ; %cmp.true +; GFX11-TRUE16-NEXT: s_lshl_b32 s5, s27, 16 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s27, 0xffff0000 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s5 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s26, 0xffff0000 +; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s26, 16 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v3, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_bfe_u32 v5, v1, 16, 1 +; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v0, 16, 1 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v0 ; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v177 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, 0x400000, v1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v5, v5, v1 +; GFX11-TRUE16-NEXT: v_bfe_u32 v8, v3, 16, 1 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v4, 0x40c00000, s6 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, 0x400000, v3 +; GFX11-TRUE16-NEXT: s_and_b32 s5, s25, 0xffff0000 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v5, 0x7fff, v5 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v0 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v6, 0x40c00000, s5 +; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s25, 16 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v177, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v177.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v178 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v178 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v2, v7, vcc_lo ; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v178, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v178.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v179 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v179 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 +; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v4, 16, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v5, v9, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, v8, v3 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v4 +; GFX11-TRUE16-NEXT: v_bfe_u32 v5, v6, 16, 1 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v15, 16, v1 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v0.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 0x7fff, v8 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v5, v6 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s24, 0xffff0000 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v3, v7, v10, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v179, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v179.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v180 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v180 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_add_f32_e64 v7, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, 0x400000, v6 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v3 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v4 +; GFX11-TRUE16-NEXT: v_bfe_u32 v9, v5, 16, 1 +; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s24, 16 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 +; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v7, 16, 1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, v9, v5 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, 0x400000, v7 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v14, 16, v1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc_lo +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v0.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, v3, v7 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v5 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v4, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s23, 0xffff0000 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x7fff, v0 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v8, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v4, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7 +; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s23, 16 +; GFX11-TRUE16-NEXT: v_bfe_u32 v5, v8, 16, 1 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v13, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v3, v4 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v3, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s22, 0xffff0000 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v1.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v5, v8 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v180, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v180.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v181 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v181 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v4 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v9, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4 +; GFX11-TRUE16-NEXT: v_bfe_u32 v7, v3, 16, 1 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, 0x400000, v8 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s22, 16 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v5, v9, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, v7, v3 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v9 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v12, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v5, v9 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v0.l +; GFX11-TRUE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s21, 0xffff0000 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v181, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v181.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v182 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v182 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX11-TRUE16-NEXT: v_bfe_u32 v8, v5, 16, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v6, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s21, 16 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v4 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, 0x400000, v3 +; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v6, 16, 1 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v182, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v182.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v183 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v183 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v183, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v183.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v168 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v168 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v9, v9 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, v8, v5 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v11, 16, v1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc_lo +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v0.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, v3, v6 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v5 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v4, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s20, 0xffff0000 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x7fff, v0 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v8, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v4, 16, 1 +; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s20, 16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v6 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 +; GFX11-TRUE16-NEXT: v_bfe_u32 v5, v8, 16, 1 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v10, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v3, v4 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v3, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s19, 0xffff0000 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.h, v1.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v5, v8 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v168, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v168.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v169 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v169 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v4 +; GFX11-TRUE16-NEXT: v_bfe_u32 v7, v3, 16, 1 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v32, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, 0x400000, v8 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, v7, v3 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v32 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v5, v32, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8 +; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s19, 16 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v9, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v5, v32 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v0.l +; GFX11-TRUE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s18, 0xffff0000 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v169, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v169.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v170 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v170 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX11-TRUE16-NEXT: v_bfe_u32 v8, v5, 16, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v6, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s18, 16 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v4 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, 0x400000, v3 +; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v6, 16, 1 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, v8, v5 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v8, 16, v1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc_lo +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v0.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, v3, v6 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v5 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v4, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s17, 0xffff0000 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x7fff, v0 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v32, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v6 +; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s17, 16 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_bfe_u32 v5, v32, 16, 1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v4, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, 0x400000, v32 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v7, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v3, v4 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v3, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v1.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v5, v32 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v4 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v170, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v170.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v171 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_bfe_u32 v33, v3, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s16, 0xffff0000 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v171 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_add_f32_e64 v34, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, v33, v3 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_bfe_u32 v5, v34, 16, 1 +; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc_lo +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v0.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v5, v34 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v32, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v4 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, 0x400000, v3 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s3, 0xffff0000 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v171, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v171.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v172 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v172 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_add_f32_e64 v33, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_bfe_u32 v35, v32, 16, 1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s3, 16 +; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v33, 16, 1 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v34, 0x40c00000, s3 +; GFX11-TRUE16-NEXT: s_and_b32 s3, s2, 0xffff0000 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, v35, v32 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v35, 0x40c00000, s3 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v0.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, v3, v33 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v32 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x7fff, v0 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, 0x400000, v33 +; GFX11-TRUE16-NEXT: v_bfe_u32 v32, v35, 16, 1 +; GFX11-TRUE16-NEXT: s_lshl_b32 s2, s2, 16 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v34, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v33, v33 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v33, 0x400000, v35 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v3, v34 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v1.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v32, v35 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v32, 0x40c00000, s2 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v34 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v172, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v172.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v173 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: s_and_b32 s2, s1, 0xffff0000 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v173 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_add_f32_e64 v37, 0x40c00000, s2 +; GFX11-TRUE16-NEXT: v_bfe_u32 v36, v32, 16, 1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_bfe_u32 v34, v37, 16, 1 +; GFX11-TRUE16-NEXT: s_lshl_b32 s1, s1, 16 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v33, vcc_lo +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, v36, v32 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v34, v37 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v34, 0x40c00000, s1 +; GFX11-TRUE16-NEXT: s_and_b32 s1, s0, 0xffff0000 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v0.l +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v33 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v33, 0x400000, v32 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v35, 0x40c00000, s1 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v173, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v173.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v174 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v37 +; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v34, 16, 1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v33, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v37, v37 +; GFX11-TRUE16-NEXT: v_bfe_u32 v32, v35, 16, 1 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v37, 0x400000, v35 +; GFX11-TRUE16-NEXT: s_lshl_b32 s0, s0, 16 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v33, v2, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v36, v38, v34 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v0.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, v32, v35 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v33 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v33, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v31 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x7fff, v0 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v36 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v36, 0x40c00000, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v33, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-TRUE16-NEXT: v_bfe_u32 v33, v36, 16, 1 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v34, 0x40c00000, v38 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v35, 0x400000, v36 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v0, v37 :: v_dual_add_nc_u32 v33, v33, v36 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v32.l +; GFX11-TRUE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v30 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v0 +; GFX11-TRUE16-NEXT: v_bfe_u32 v0, v34, 16, 1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v30, 16, v30 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v33, v33, v35 :: v_dual_add_nc_u32 v0, v0, v34 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v37 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v37, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v36, 0x7fff, v0 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v31, 16, v31 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v33 +; GFX11-TRUE16-NEXT: v_bfe_u32 v33, v35, 16, 1 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v32.l +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v34, v36, v37 :: v_dual_add_f32 v31, 0x40c00000, v31 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v30, 0x40c00000, v30 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, v33, v35 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v37, 0x400000, v35 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v34 +; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v31, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v31, v31 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v36, v38, v31 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v29 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v29, 16, v29 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v36 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v31 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v29, 0x40c00000, v29 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v31, v34, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v34, v30, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v38 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v30 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v31, 16, v31 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, v34, v30 +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v32, v32, v37 :: v_dual_and_b32 v37, 0xffff0000, v28 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v31.h, v33.l +; GFX11-TRUE16-NEXT: v_bfe_u32 v33, v35, 16, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v30, v30 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v28, 16, v28 +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v30, v34, v36 :: v_dual_add_nc_u32 v33, v33, v35 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v34, 0x40c00000, v37 +; GFX11-TRUE16-NEXT: v_bfe_u32 v37, v29, 16, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v35 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v30, 16, v30 +; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v34, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-TRUE16-NEXT: v_dual_add_f32 v28, 0x40c00000, v28 :: v_dual_add_nc_u32 v35, v37, v29 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v30.h, v32.l +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v33, v33, v36 :: v_dual_add_nc_u32 v32, v38, v34 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v29 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v37, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v29, v29 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v27 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v29, v35, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v35, v28, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v34, 0x40c00000, v38 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v28 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v29, 16, v29 +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v32, v32, v37 :: v_dual_add_nc_u32 v35, v35, v28 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v26 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v26, 16, v26 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v27, 16, v27 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.h, v33.l +; GFX11-TRUE16-NEXT: v_bfe_u32 v33, v34, 16, 1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v28, v28 +; GFX11-TRUE16-NEXT: v_dual_add_f32 v26, 0x40c00000, v26 :: v_dual_add_f32 v27, 0x40c00000, v27 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, v33, v34 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v28, v35, v36 :: v_dual_add_f32 v35, 0x40c00000, v37 +; GFX11-TRUE16-NEXT: v_bfe_u32 v37, v27, 16, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v35, 16, 1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, v37, v27 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v28, 16, v28 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v28.h, v32.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v33, v33, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v27 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v27, v27 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, v38, v35 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v37, 0x400000, v35 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v25 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v27, v34, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v34, v26, 16, 1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v26 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v38 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, v34, v26 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v27, 16, v27 +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v32, v32, v37 :: v_dual_and_b32 v37, 0xffff0000, v24 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v26, v26 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v24, 16, v24 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v25, 16, v25 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.h, v33.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v26, v34, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_add_f32_e32 v34, 0x40c00000, v37 +; GFX11-TRUE16-NEXT: v_bfe_u32 v33, v35, 16, 1 +; GFX11-TRUE16-NEXT: v_dual_add_f32 v24, 0x40c00000, v24 :: v_dual_add_f32 v25, 0x40c00000, v25 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v26, 16, v26 +; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v34, 16, 1 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v26.h, v32.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v35 +; GFX11-TRUE16-NEXT: v_bfe_u32 v37, v25, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, v38, v34 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v23 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v23, 16, v23 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, v33, v35 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, v37, v25 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v37, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v23, 0x40c00000, v23 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v33, v33, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v25 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v25, v25 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v25, v35, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v35, v24, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v34, 0x40c00000, v38 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v24 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v25, 16, v25 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.h, v33.l +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v32, v32, v37 :: v_dual_and_b32 v37, 0xffff0000, v22 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v22, 16, v22 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, v35, v24 +; GFX11-TRUE16-NEXT: v_bfe_u32 v33, v34, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v24, v24 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v22, 0x40c00000, v22 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v174 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v24, v35, v36 :: v_dual_add_nc_u32 v33, v33, v34 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v37 +; GFX11-TRUE16-NEXT: v_bfe_u32 v37, v23, 16, 1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v35, 16, 1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, v37, v23 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v24, 16, v24 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.h, v32.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v33, v33, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v23 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v23, v23 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, v38, v35 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v37, 0x400000, v35 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v21 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v23, v34, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v34, v22, 16, 1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v22 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v38 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, v34, v22 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v23, 16, v23 +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v32, v32, v37 :: v_dual_and_b32 v37, 0xffff0000, v20 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v22, v22 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.h, v33.l +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-TRUE16-NEXT: v_bfe_u32 v33, v35, 16, 1 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v20, 16, v20 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v22, v34, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_add_f32_e32 v34, 0x40c00000, v37 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v35 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, v33, v35 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v22, 16, v22 +; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v34, 16, 1 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.h, v32.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v20, 0x40c00000, v20 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, v38, v34 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v19 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v21, 16, v21 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v33, v33, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v19, 16, v19 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_f32_e32 v21, 0x40c00000, v21 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_add_f32_e32 v19, 0x40c00000, v19 +; GFX11-TRUE16-NEXT: v_bfe_u32 v37, v21, 16, 1 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v21 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v21, v21 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, v37, v21 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v37, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v174, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v174.h, v0.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v21, v35, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v35, v20, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v34, 0x40c00000, v38 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v20 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v21, 16, v21 +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v32, v32, v37 :: v_dual_add_nc_u32 v35, v35, v20 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v33.l +; GFX11-TRUE16-NEXT: v_bfe_u32 v33, v34, 16, 1 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v18 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v20, v20 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v18, 16, v18 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, v33, v34 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v20, v35, v36 :: v_dual_add_f32 v35, 0x40c00000, v37 +; GFX11-TRUE16-NEXT: v_bfe_u32 v37, v19, 16, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v35, 16, 1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, v37, v19 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v18, 0x40c00000, v18 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v20, 16, v20 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v33, v33, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v19 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v19, v19 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.h, v32.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, v38, v35 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v17 +; GFX11-TRUE16-NEXT: v_bfe_u32 v39, v18, 16, 1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v19, v34, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v38, 0x400000, v35 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v34, 0x40c00000, v37 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, v39, v18 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v19, 16, v19 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v33.l +; GFX11-TRUE16-NEXT: v_bfe_u32 v36, v34, 16, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v32, v32, v38 :: v_dual_add_nc_u32 v33, 0x7fff, v35 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v35, 0x400000, v18 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v17, 16, v17 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v16 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v18, v18 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v36, v36, v34 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_dual_add_f32 v17, 0x40c00000, v17 :: v_dual_lshlrev_b32 v16, 16, v16 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v18, v33, v35 :: v_dual_add_f32 v33, 0x40c00000, v37 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v36 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_bfe_u32 v37, v17, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v33, 16, 1 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v16, 0x40c00000, v16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v39, 0x400000, v17 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v18, 16, v18 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v34, v35, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v36, v37, v17 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v37, v38, v33 +; GFX11-TRUE16-NEXT: v_bfe_u32 v35, v16, 16, 1 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v38, 0x400000, v33 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v33, v33 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v36, 0x7fff, v36 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v37, 0x7fff, v37 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, v35, v16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v48, 0x400000, v16 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v34 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v32.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v33, v37, v38, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v17, v17 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v17, v36, v39, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v16, v16 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v17, 16, v17 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v16, v35, v48, vcc_lo +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v34.l +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v16, 16, v16 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.h, v33.l ; GFX11-TRUE16-NEXT: .LBB43_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, v125 :: v_dual_mov_b32 v5, v120 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, v114 :: v_dual_mov_b32 v7, v107 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, v99 :: v_dual_mov_b32 v9, v90 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, v57 :: v_dual_mov_b32 v13, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, v30 :: v_dual_mov_b32 v17, v173 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, v174 :: v_dual_mov_b32 v19, v171 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v18, v172 :: v_dual_mov_b32 v21, v169 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, v170 :: v_dual_mov_b32 v23, v183 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v22, v168 :: v_dual_mov_b32 v25, v181 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0x6 ; 28-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:280 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, v135 :: v_dual_mov_b32 v1, v134 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, v132 :: v_dual_mov_b32 v3, v129 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, v80 :: v_dual_mov_b32 v11, v69 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v24, v182 :: v_dual_mov_b32 v27, v179 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v180 :: v_dual_mov_b32 v29, v177 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v178 :: v_dual_mov_b32 v31, v167 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v30, v176 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB43_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 ; GFX11-TRUE16-NEXT: s_branch .LBB43_2 ; ; GFX11-FAKE16-LABEL: bitcast_v64bf16_to_v32f32_scalar: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 -; GFX11-FAKE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:288 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:284 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:280 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:276 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:272 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:268 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:264 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:260 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:256 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:252 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:248 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:244 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:240 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:236 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:232 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:228 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:224 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:220 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:216 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:212 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:208 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:204 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:200 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:196 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:192 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:188 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:184 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:180 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:176 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:172 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:168 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:164 -; GFX11-FAKE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:160 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:156 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:152 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:148 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:144 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:140 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:136 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:132 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v120, s32 offset:128 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v121, s32 offset:124 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v122, s32 offset:120 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v123, s32 offset:116 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v124, s32 offset:112 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v125, s32 offset:108 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v126, s32 offset:104 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v127, s32 offset:100 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v136, s32 offset:96 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v137, s32 offset:92 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v138, s32 offset:88 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v139, s32 offset:84 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v140, s32 offset:80 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v141, s32 offset:76 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v142, s32 offset:72 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v143, s32 offset:68 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v152, s32 offset:64 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v153, s32 offset:60 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v154, s32 offset:56 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v155, s32 offset:52 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v156, s32 offset:48 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v157, s32 offset:44 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v158, s32 offset:40 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v159, s32 offset:36 -; GFX11-FAKE16-NEXT: s_clause 0x8 ; 36-byte Folded Spill -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v168, s32 offset:32 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v169, s32 offset:28 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v170, s32 offset:24 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v171, s32 offset:20 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v172, s32 offset:16 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v173, s32 offset:12 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v174, s32 offset:8 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v175, s32 offset:4 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v184, s32 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v178, v13 :: v_dual_mov_b32 v179, v12 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v180, v11 :: v_dual_mov_b32 v181, v9 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v182, v10 :: v_dual_mov_b32 v169, v7 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v170, v8 :: v_dual_mov_b32 v177, v3 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v176, v6 :: v_dual_mov_b32 v171, v4 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v174, v5 :: v_dual_mov_b32 v173, v0 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v184, v2 :: v_dual_mov_b32 v175, v1 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v183, s28 :: v_dual_mov_b32 v172, s29 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v31, v13 :: v_dual_mov_b32 v30, v12 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v27, v9 :: v_dual_mov_b32 v26, v8 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v16, s28 :: v_dual_mov_b32 v17, s29 ; GFX11-FAKE16-NEXT: s_mov_b32 s4, 0 ; GFX11-FAKE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo ; GFX11-FAKE16-NEXT: s_cbranch_scc0 .LBB43_4 ; GFX11-FAKE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v32, s0 :: v_dual_mov_b32 v37, s2 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v34, s1 :: v_dual_mov_b32 v41, s3 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v46, s16 :: v_dual_mov_b32 v59, s18 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v52, s17 :: v_dual_mov_b32 v67, s19 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v76, s20 :: v_dual_mov_b32 v97, s22 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v86, s21 :: v_dual_mov_b32 v109, s23 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v122, s24 :: v_dual_mov_b32 v151, s26 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v136, s25 :: v_dual_mov_b32 v15, s27 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v5, s17 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v6, s18 :: v_dual_mov_b32 v7, s19 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v8, s20 :: v_dual_mov_b32 v9, s21 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v10, s22 :: v_dual_mov_b32 v11, s23 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v12, s24 :: v_dual_mov_b32 v13, s25 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v14, s26 :: v_dual_mov_b32 v15, s27 ; GFX11-FAKE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 ; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB43_3 ; GFX11-FAKE16-NEXT: .LBB43_2: ; %cmp.true @@ -68322,762 +67146,674 @@ define inreg <32 x float> @bitcast_v64bf16_to_v32f32_scalar(<64 x bfloat> inreg ; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s27, 16 ; GFX11-FAKE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s5 ; GFX11-FAKE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s26, 16 ; GFX11-FAKE16-NEXT: s_and_b32 s4, s26, 0xffff0000 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s6 +; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s26, 16 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v3, 0x40c00000, s4 ; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v1, 16, 1 ; GFX11-FAKE16-NEXT: v_bfe_u32 v2, v0, 16, 1 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s6 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, 0x400000, v1 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v3, 0x40c00000, s4 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v8, 0x400000, v0 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v1 +; GFX11-FAKE16-NEXT: v_bfe_u32 v9, v3, 16, 1 ; GFX11-FAKE16-NEXT: v_bfe_u32 v10, v5, 16, 1 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-FAKE16-NEXT: v_bfe_u32 v9, v3, 16, 1 -; GFX11-FAKE16-NEXT: s_lshl_b32 s7, s25, 16 +; GFX11-FAKE16-NEXT: s_and_b32 s5, s25, 0xffff0000 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 0x7fff, v4 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, v2, v0 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s25, 0xffff0000 -; GFX11-FAKE16-NEXT: s_and_b32 s4, s24, 0xffff0000 ; GFX11-FAKE16-NEXT: v_add_f32_e64 v6, 0x40c00000, s5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v51, 0xffff0000, v183 +; GFX11-FAKE16-NEXT: s_lshl_b32 s7, s25, 16 +; GFX11-FAKE16-NEXT: s_and_b32 s4, s24, 0xffff0000 +; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s24, 16 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v8, vcc_lo +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, v9, v3 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, v10, v5 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, v9, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, 0x400000, v5 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, 0x400000, v3 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-FAKE16-NEXT: v_bfe_u32 v10, v6, 16, 1 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v4, v7, vcc_lo +; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v1, v4, v7 :: v_dual_add_nc_u32 v2, 0x7fff, v2 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v5 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 0x7fff, v8 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 ; GFX11-FAKE16-NEXT: v_add_f32_e64 v8, 0x40c00000, s7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 +; GFX11-FAKE16-NEXT: v_bfe_u32 v5, v6, 16, 1 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc_lo ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3 -; GFX11-FAKE16-NEXT: v_bfe_u32 v3, v8, 16, 1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-FAKE16-NEXT: v_bfe_u32 v7, v8, 16, 1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-FAKE16-NEXT: v_lshl_or_b32 v15, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, v3, v8 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v4 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc_lo +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v5, v6 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, 0x400000, v6 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, v7, v8 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v8 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v7, 0x40c00000, s5 +; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s23, 16 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v14, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v5 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s4 +; GFX11-FAKE16-NEXT: v_bfe_u32 v8, v7, 16, 1 +; GFX11-FAKE16-NEXT: s_and_b32 s4, s23, 0xffff0000 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v5, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, v8, v7 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v8, 0x40c00000, s5 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v5 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc_lo +; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, 0x400000, v5 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7 +; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s22, 16 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v6 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v7 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v6, 0x40c00000, s4 +; GFX11-FAKE16-NEXT: v_bfe_u32 v7, v8, 16, 1 +; GFX11-FAKE16-NEXT: s_and_b32 s4, s22, 0xffff0000 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v6, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, v7, v8 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v7, 0x40c00000, s5 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v6 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc_lo +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v13, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v5 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v8 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, 0x400000, v6 +; GFX11-FAKE16-NEXT: v_bfe_u32 v8, v7, 16, 1 +; GFX11-FAKE16-NEXT: s_and_b32 s4, s21, 0xffff0000 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v5, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, v8, v7 +; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s21, 16 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v5 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc_lo +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v12, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v8, 0x40c00000, s5 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v6 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v7 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v6, 0x40c00000, s4 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, 0x400000, v5 +; GFX11-FAKE16-NEXT: v_bfe_u32 v7, v8, 16, 1 +; GFX11-FAKE16-NEXT: s_and_b32 s4, s20, 0xffff0000 +; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s20, 16 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v6, 16, 1 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, v10, v6 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v4, v7, v9, vcc_lo -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, v7, v8 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v11, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v6 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc_lo +; GFX11-FAKE16-NEXT: v_add_f32_e64 v7, 0x40c00000, s5 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v5 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v8 ; GFX11-FAKE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s24, 16 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v4 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v6 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, 0x400000, v8 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v9, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: s_and_b32 s4, s23, 0xffff0000 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, 0x400000, v6 +; GFX11-FAKE16-NEXT: v_bfe_u32 v8, v7, 16, 1 +; GFX11-FAKE16-NEXT: s_and_b32 s4, s19, 0xffff0000 ; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo ; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v5, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v8, 0x400000, v5 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, 0x400000, v9 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, v8, v7 +; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s19, 16 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v5 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v6, v1, v7 :: v_dual_and_b32 v1, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_bfe_u32 v7, v9, 16, 1 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v3 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 0x7fff, v4 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v6 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, v7, v9 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v7, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s23, 16 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v151, v0, 16, v1 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v12, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 0x7fff, v6 -; GFX11-FAKE16-NEXT: v_bfe_u32 v11, v7, 16, 1 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v5, v4, v8, vcc_lo -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v9, v9 -; GFX11-FAKE16-NEXT: s_and_b32 s4, s22, 0xffff0000 -; GFX11-FAKE16-NEXT: v_bfe_u32 v9, v12, 16, 1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, v11, v7 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xffff, v3 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc_lo -; GFX11-FAKE16-NEXT: v_add_f32_e64 v10, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s22, 16 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v5 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v11, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 0x7fff, v8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, v9, v12 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, 0x400000, v7 -; GFX11-FAKE16-NEXT: v_bfe_u32 v14, v10, 16, 1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc_lo +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v10, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v8, 0x40c00000, s5 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v6 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v7 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v6, 0x40c00000, s4 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v13, 0x400000, v12 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, 0x7fff, v8 -; GFX11-FAKE16-NEXT: s_and_b32 s4, s21, 0xffff0000 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v7, v6, v9, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v9, v11, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v12, v12 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, v14, v10 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xffff, v5 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v7 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v8, v8, v13 :: v_dual_add_nc_u32 v7, v9, v11 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v9, 0x7fff, v12 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, 0x400000, v10 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v13, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v10, v10 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s21, 16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 0x7fff, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v14, 0x400000, v11 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v16, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v9, v9, v12, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v10, v13, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v11, v11 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 16, v8 -; GFX11-FAKE16-NEXT: v_bfe_u32 v12, v16, 16, 1 -; GFX11-FAKE16-NEXT: s_and_b32 s4, s20, 0xffff0000 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v11, v7, v14 :: v_dual_add_nc_u32 v10, v10, v13 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xffff, v8 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 16, v9 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v14, 0x400000, v13 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v10, 0x7fff, v10 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v9, 16, v11 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, v12, v16 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v12, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v13, v13 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s20, 16 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v17, 0x400000, v16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 0x7fff, v11 -; GFX11-FAKE16-NEXT: v_bfe_u32 v18, v12, 16, 1 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v19, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v13, v10, v14, vcc_lo -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v16, v16 -; GFX11-FAKE16-NEXT: s_and_b32 s4, s19, 0xffff0000 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v14, v18, v12 -; GFX11-FAKE16-NEXT: v_bfe_u32 v16, v19, 16, 1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v10, 0xffff, v9 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v11, v11, v17, vcc_lo -; GFX11-FAKE16-NEXT: v_add_f32_e64 v17, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s19, 16 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v9, 16, v13 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v13, 0x7fff, v14 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v14, v16, v19 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v16, 0x400000, v12 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v18, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_bfe_u32 v21, v17, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v12, v12 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v11, 16, v11 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v14, 0x7fff, v14 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v20, 0x400000, v19 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, 0x400000, v5 +; GFX11-FAKE16-NEXT: v_bfe_u32 v7, v8, 16, 1 ; GFX11-FAKE16-NEXT: s_and_b32 s4, s18, 0xffff0000 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v13, v13, v16, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v16, v18, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v19, v19 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v19, v21, v17 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xffff, v11 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v11, 16, v13 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v14, v14, v20 :: v_dual_add_nc_u32 v13, v16, v18 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v16, 0x7fff, v19 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v19, 0x400000, v17 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v20, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v17, v17 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s18, 16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v13, 0x7fff, v13 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v21, 0x400000, v18 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v22, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v16, v16, v19, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v17, v20, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v18, v18 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v14, 16, v14 -; GFX11-FAKE16-NEXT: v_bfe_u32 v19, v22, 16, 1 +; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s18, 16 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v6, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, v7, v8 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v7, 0x40c00000, s5 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v6 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc_lo +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v9, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v5 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v8 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v32, 0x400000, v6 +; GFX11-FAKE16-NEXT: v_bfe_u32 v8, v7, 16, 1 ; GFX11-FAKE16-NEXT: s_and_b32 s4, s17, 0xffff0000 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, v17, v20 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v18, v13, v21 :: v_dual_and_b32 v13, 0xffff, v14 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v14, 16, v16 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v21, 0x400000, v20 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, 0x7fff, v17 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v16, 16, v18 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v18, v19, v22 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v19, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v20, v20 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s17, 16 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v23, 0x400000, v22 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v18, 0x7fff, v18 -; GFX11-FAKE16-NEXT: v_bfe_u32 v24, v19, 16, 1 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v25, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v20, v17, v21, vcc_lo -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v22, v22 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v5, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 +; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s17, 16 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v33, 0x400000, v5 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v5 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v32, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, v8, v7 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v8, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v6 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v7 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v6, 0x40c00000, s4 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v32, 0x40c00000, s5 ; GFX11-FAKE16-NEXT: s_and_b32 s4, s16, 0xffff0000 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v21, v24, v19 -; GFX11-FAKE16-NEXT: v_bfe_u32 v22, v25, 16, 1 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v18, v18, v23 :: v_dual_and_b32 v17, 0xffff, v16 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v23, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s16, 16 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v16, 16, v20 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v20, 0x7fff, v21 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v21, v22, v25 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v22, 0x400000, v19 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v24, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_bfe_u32 v27, v23, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v19, v19 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v18, 16, v18 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v21, 0x7fff, v21 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v26, 0x400000, v25 +; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s16, 16 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v6, 16, 1 +; GFX11-FAKE16-NEXT: v_bfe_u32 v7, v32, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v34, 0x400000, v6 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v6 +; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v2, v2, v33 :: v_dual_add_nc_u32 v5, v7, v32 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v7, v0, 16, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v5 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v32 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s4 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v33, 0x40c00000, s5 ; GFX11-FAKE16-NEXT: s_and_b32 s4, s3, 0xffff0000 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v20, v20, v22, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v22, v24, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v25, v25 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v25, v27, v23 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v19, 0xffff, v18 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v18, 16, v20 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v21, v21, v26 :: v_dual_add_nc_u32 v20, v22, v24 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v22, 0x7fff, v25 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v25, 0x400000, v23 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v26, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v23, v23 ; GFX11-FAKE16-NEXT: s_lshl_b32 s3, s3, 16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v20, 0x7fff, v20 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v27, 0x400000, v24 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v28, 0x40c00000, s3 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v22, v22, v25, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v23, v26, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v24, v24 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v21, 16, v21 -; GFX11-FAKE16-NEXT: v_bfe_u32 v25, v28, 16, 1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v5, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 +; GFX11-FAKE16-NEXT: v_bfe_u32 v32, v33, 16, 1 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v6, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v35, 0x400000, v5 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v5 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v34, vcc_lo +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v33, v33 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v34, 0x40c00000, s3 ; GFX11-FAKE16-NEXT: s_and_b32 s3, s2, 0xffff0000 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v24, v20, v27 :: v_dual_add_nc_u32 v23, v23, v26 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v20, 0xffff, v21 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v21, 16, v22 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v27, 0x400000, v26 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v23, 0x7fff, v23 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v22, 16, v24 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v24, v25, v28 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v25, 0x40c00000, s3 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v26, v26 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v33 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v32, v33 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_bfe_u32 v33, v34, 16, 1 ; GFX11-FAKE16-NEXT: s_lshl_b32 s2, s2, 16 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v29, 0x400000, v28 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v24, 0x7fff, v24 -; GFX11-FAKE16-NEXT: v_bfe_u32 v30, v25, 16, 1 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v31, 0x40c00000, s2 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v26, v23, v27, vcc_lo -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v28, v28 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v32 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v32, 0x40c00000, s4 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v33, v34 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v32, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v5, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v36, 0x400000, v32 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v32 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v35, vcc_lo +; GFX11-FAKE16-NEXT: v_add_f32_e64 v35, 0x40c00000, s2 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v33 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v34 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v33, 0x40c00000, s3 +; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v35, 16, 1 ; GFX11-FAKE16-NEXT: s_and_b32 s2, s1, 0xffff0000 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, v30, v25 -; GFX11-FAKE16-NEXT: v_bfe_u32 v28, v31, 16, 1 ; GFX11-FAKE16-NEXT: s_lshl_b32 s1, s1, 16 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v24, v24, v29, vcc_lo -; GFX11-FAKE16-NEXT: v_add_f32_e64 v29, 0x40c00000, s2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v23, 0xffff, v22 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v22, 16, v26 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v26, 0x7fff, v27 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, v28, v31 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v28, 0x400000, v25 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v30, 0x40c00000, s1 -; GFX11-FAKE16-NEXT: v_bfe_u32 v33, v29, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v25, v25 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v24, 16, v24 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, 0x7fff, v27 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v32, 0x400000, v31 -; GFX11-FAKE16-NEXT: s_and_b32 s1, s0, 0xffff0000 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v26, v26, v28, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v28, v30, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v31, v31 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, v33, v29 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v25, 0xffff, v24 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v24, 16, v26 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v27, v27, v32 :: v_dual_add_nc_u32 v26, v28, v30 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v33, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v34, v35 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v33 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v28, 0x7fff, v31 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v31, 0x400000, v29 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v32, 0x40c00000, s1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v29, v29 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v4, v33 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v36, vcc_lo +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v4, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v36, 0x40c00000, s1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v32 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v34 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v32, 0x400000, v35 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v34, 0x40c00000, s2 +; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v36, 16, 1 +; GFX11-FAKE16-NEXT: s_and_b32 s1, s0, 0xffff0000 ; GFX11-FAKE16-NEXT: s_lshl_b32 s0, s0, 16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v26, 0x7fff, v26 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v33, 0x400000, v30 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v34, 0x40c00000, s0 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v28, v28, v31, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v29, v32, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v30, v30 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v35, 0x400000, v32 -; GFX11-FAKE16-NEXT: v_bfe_u32 v31, v34, 16, 1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v36, 0x400000, v34 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v27, 16, v27 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v30, v26, v33, vcc_lo -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v26, 16, v28 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v28, v29, v32 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v33, 16, v178 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v29, 16, v30 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v30, v31, v34 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v31, 0xffff0000, v178 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v28, 0x7fff, v28 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v33, 0x40c00000, v33 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v109, v5, 16, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v30, 0x7fff, v30 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v31, 0x40c00000, v31 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v32, v28, v35, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v37, v33, 16, 1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v32, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v32, v34, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v33, v33 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, v35, v36 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v38, 0x40c00000, s0 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v32, v34 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v37, vcc_lo +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v3, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v33 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v33, 0x400000, v36 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v32 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v35 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v35, 0x40c00000, s1 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v34 +; GFX11-FAKE16-NEXT: v_bfe_u32 v36, v38, 16, 1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v32, v32, v33, vcc_lo ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v28, 0xffff, v29 +; GFX11-FAKE16-NEXT: v_bfe_u32 v33, v35, 16, 1 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v2, v37, vcc_lo +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v2, v0, 16, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, v33, v35 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v36, v38 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v36, 0x400000, v38 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v34 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff0000, v31 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v31, 16, v31 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xffff, v32 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_dual_add_f32 v34, 0x40c00000, v34 :: v_dual_add_f32 v31, 0x40c00000, v31 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v35 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v33, v33, v36, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-FAKE16-NEXT: v_bfe_u32 v36, v34, 16, 1 ; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v31, 16, 1 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v29, 16, v32 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v37, v33 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v30, v30, v36, vcc_lo -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v36, 16, v179 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v35, v31 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v33 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v33, v33 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v179 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v31 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x7fff, v0 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v1, v1, 16, v32 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v36, v34 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, v35, v31 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v36, 0xffff0000, v30 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v30, 16, v30 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v0, v37, vcc_lo +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v31 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 ; GFX11-FAKE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v36 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v32, v32, v37, vcc_lo +; GFX11-FAKE16-NEXT: v_add_f32_e32 v30, 0x40c00000, v30 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v31, v31 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v35 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v180 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v36 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v31, v34, v38, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v36, 16, 1 -; GFX11-FAKE16-NEXT: v_bfe_u32 v33, v35, 16, 1 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v38, 16, v180 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_dual_add_f32 v37, 0x40c00000, v37 :: v_dual_add_nc_u32 v34, v34, v36 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v33, v35 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v48, 0x400000, v35 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v38, 0x40c00000, v38 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v33, 0xffff, v33 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v34 +; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v31, v35, v37 :: v_dual_add_nc_u32 v32, 0x7fff, v32 +; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v36, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v30, 16, 1 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v0, v0, 16, v33 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v31, 16, v31 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v35, v36 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v29 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v34, v30 +; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v32, v32, v38 :: v_dual_lshlrev_b32 v29, 16, v29 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v30 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v35 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_add_f32_e32 v29, 0x40c00000, v29 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v30, v30 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v31, 0xffff, v31 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v36 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xffff, v32 -; GFX11-FAKE16-NEXT: v_bfe_u32 v36, v37, 16, 1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v30, v34, v37, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v35, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 +; GFX11-FAKE16-NEXT: v_bfe_u32 v36, v29, 16, 1 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v31, v32, 16, v31 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v30, 16, v30 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v34, v39, vcc_lo -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 -; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v38, 16, 1 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v178, v31, 16, v32 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, v36, v37 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v33, v33, v48 :: v_dual_lshlrev_b32 v36, 16, v182 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v34, 16, v34 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v38 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38 +; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v33, v33, v38 :: v_dual_add_nc_u32 v32, v34, v35 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v36 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v33 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v35, v38 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v182 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff, v34 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v48, 0x400000, v37 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, 0x7fff, v31 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v35 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v179, v32, 16, v34 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v30, 0xffff, v30 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v136, v2, 16, v4 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v33, v33, v39, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v38, v35, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v37, v37 -; GFX11-FAKE16-NEXT: v_bfe_u32 v37, v36, 16, 1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v36 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v36, v29 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v36, 0xffff0000, v28 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v28, 16, v28 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v29 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v29, v29 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v31, v31, v48 :: v_dual_add_nc_u32 v38, v38, v35 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v37, v36 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v181 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v38 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v38, 16, v181 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v48, 0x400000, v35 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v37, 0x40c00000, v37 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v31, 16, v31 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v38, 0x40c00000, v38 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v32, v32, v39, vcc_lo +; GFX11-FAKE16-NEXT: v_add_f32_e32 v28, 0x40c00000, v28 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v30, 0xffff, v30 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v36 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v35 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v29, v34, v37, vcc_lo ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v33, 0xffff, v33 -; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v37, 16, 1 -; GFX11-FAKE16-NEXT: v_bfe_u32 v36, v38, 16, 1 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v34, v48, vcc_lo -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v180, v31, 16, v33 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v35, v37 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v170 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v38 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v31, 16, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v36, v38 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_dual_add_f32 v35, 0x40c00000, v35 :: v_dual_lshlrev_b32 v36, 16, v170 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xffff, v32 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v28, 16, 1 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v36, 16, 1 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v30, v33, 16, v30 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v28 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v32, v32, v38 :: v_dual_add_nc_u32 v33, v35, v28 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v34, v36 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v27 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 16, v27 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v36 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v48, 0x400000, v37 -; GFX11-FAKE16-NEXT: v_bfe_u32 v38, v35, 16, 1 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v34, v39, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v39, v36, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v37, v37 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v182, v31, 16, v32 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v37, v38, v35 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v34, 16, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, v39, v36 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v33, v33, v48, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v28, v28 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v29, 16, v29 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v36 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v35 +; GFX11-FAKE16-NEXT: v_dual_add_f32 v27, 0x40c00000, v27 :: v_dual_cndmask_b32 v28, v33, v37 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v48, 0x400000, v35 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, 0x7fff, v31 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v29, 0xffff, v29 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_bfe_u32 v36, v27, 16, 1 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v28, 16, v28 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v33, v34, v38, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v35, 16, 1 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v29, v32, 16, v29 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v27 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v27, v27 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v33 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v33, 0xffff, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v37 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v169 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v31, v31, v38, vcc_lo +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v34, v35 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v36, v27 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v36, 0xffff0000, v26 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v26, 16, v26 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v28, 0xffff, v28 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v36 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v26, 0x40c00000, v26 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v35 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v28, v32, 16, v28 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v27, v34, v37, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v36, 16, 1 +; GFX11-FAKE16-NEXT: v_bfe_u32 v37, v26, 16, 1 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v39, 16, v169 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v181, v32, 16, v33 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v37, 0x40c00000, v37 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v176 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v34, v48, vcc_lo -; GFX11-FAKE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v39 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v31, 16, v31 -; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v37, 16, 1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v37 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v34 -; GFX11-FAKE16-NEXT: v_bfe_u32 v32, v36, 16, 1 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v38, 0x40c00000, v38 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v35, v37 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v35, 16, v176 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v37, v37 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v32, v36 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v31, 0xffff, v31 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v25 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v27, 16, v27 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v34, v36 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v37, v26 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v33, v33, v38, vcc_lo +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v26 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v26, v26 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v35 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v48, 0x400000, v36 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 -; GFX11-FAKE16-NEXT: v_bfe_u32 v37, v38, 16, 1 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v34, v39, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v49, v35, 16, 1 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v25 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v36 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v35 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v26, v32, v37, vcc_lo ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 ; GFX11-FAKE16-NEXT: v_and_b32_e32 v27, 0xffff, v27 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v170, v33, 16, v31 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v31, 16, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, v49, v35 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v32, v32, v48 :: v_dual_add_nc_u32 v33, v37, v38 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v174 +; GFX11-FAKE16-NEXT: v_dual_add_f32 v25, 0x40c00000, v25 :: v_dual_lshlrev_b32 v36, 16, v24 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v24, 0xffff0000, v24 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v32, v34, v38, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v35, 16, 1 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v27, v33, 16, v27 +; GFX11-FAKE16-NEXT: v_bfe_u32 v33, v25, 16, 1 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v36 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v35 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v34, v35 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v36 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v36, 0x400000, v35 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v33, v25 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v26, 16, v26 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v24, 0x40c00000, v24 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v25 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-FAKE16-NEXT: v_bfe_u32 v39, v36, 16, 1 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v34, v37, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v25, v25 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v26, 0xffff, v26 +; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v24, 16, 1 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v23 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v23, 16, v23 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v25, v33, v38, vcc_lo +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v39, v36 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v26, v32, 16, v26 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v34 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v35, v24 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v35, 0x400000, v36 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v38 -; GFX11-FAKE16-NEXT: v_dual_add_f32 v35, 0x40c00000, v37 :: v_dual_cndmask_b32 v34, v34, v36 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v36, 16, v174 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xffff, v32 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_bfe_u32 v37, v35, 16, 1 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v34, 16, v34 -; GFX11-FAKE16-NEXT: v_dual_add_f32 v36, 0x40c00000, v36 :: v_dual_cndmask_b32 v33, v33, v39 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v169, v31, 16, v32 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v37, v37, v35 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff, v34 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_bfe_u32 v31, v36, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v25, 16, v25 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v24 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v23, 0x40c00000, v23 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v33, v33, v35, vcc_lo +; GFX11-FAKE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v37 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v24, v24 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v25, 0xffff, v25 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v36, 0xffff0000, v22 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xffff0000, v171 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v22, 16, v22 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v24, v34, v38, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v35, 16, 1 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v25, v32, 16, v25 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v33, 0xffff, v33 +; GFX11-FAKE16-NEXT: v_bfe_u32 v32, v23, 16, 1 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v24, 16, v24 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v34, v35 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v36 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v38, 16, v177 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, v31, v36 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v176, v33, 16, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v37 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v32, v23 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v24, v24, 16, v33 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v34 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v34, 0x400000, v35 -; GFX11-FAKE16-NEXT: v_dual_add_f32 v32, 0x40c00000, v32 :: v_dual_lshlrev_b32 v37, 16, v171 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, 0x7fff, v31 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v38, 0x40c00000, v38 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v36, 16, 1 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v22, 0x40c00000, v22 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v36 ; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v33, v33, v34, vcc_lo -; GFX11-FAKE16-NEXT: v_or_b32_e32 v34, 0x400000, v36 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v37 -; GFX11-FAKE16-NEXT: v_bfe_u32 v37, v32, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v32 -; GFX11-FAKE16-NEXT: v_bfe_u32 v50, v38, 16, 1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v48, 0x400000, v35 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v31, v31, v34 :: v_dual_add_nc_u32 v36, v37, v32 -; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v35, 16, 1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v177 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, 0x7fff, v36 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v31, 16, v31 -; GFX11-FAKE16-NEXT: v_dual_add_f32 v37, 0x40c00000, v37 :: v_dual_add_nc_u32 v34, v34, v35 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v34, 0x400000, v23 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v23, v23 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v22 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v32, v36, v39, vcc_lo -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v23, v32, v34 :: v_dual_add_nc_u32 v34, v35, v36 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v21 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v21, 16, v21 +; GFX11-FAKE16-NEXT: v_bfe_u32 v32, v22, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 -; GFX11-FAKE16-NEXT: v_bfe_u32 v49, v37, 16, 1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, v50, v38 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v38 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v50, 16, v184 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v35 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_dual_add_f32 v21, 0x40c00000, v21 :: v_dual_add_nc_u32 v32, v32, v22 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v23, 16, v23 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v34, v37, vcc_lo ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v34, v34, v48 :: v_dual_add_nc_u32 v35, v49, v37 +; GFX11-FAKE16-NEXT: v_bfe_u32 v39, v35, 16, 1 +; GFX11-FAKE16-NEXT: v_bfe_u32 v48, v21, 16, 1 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v22, v22 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v21 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v23, 0xffff, v23 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, v48, v21 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v34, 16, v34 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v22, v32, v38, vcc_lo +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v39, v35 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v20 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, 0x7fff, v36 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v48, 0xffff0000, v184 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v20, 16, v20 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v21, v21 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v22, 16, v22 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v35 +; GFX11-FAKE16-NEXT: v_dual_add_f32 v20, 0x40c00000, v20 :: v_dual_cndmask_b32 v21, v36, v37 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v38 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v22, 0xffff, v22 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v34, 16, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v49, 0x400000, v37 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v36, v36, v39, vcc_lo -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v37, v37 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v37, 0x40c00000, v50 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v31, 0xffff, v31 +; GFX11-FAKE16-NEXT: v_bfe_u32 v37, v20, 16, 1 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v21, 16, v21 +; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v36, 16, 1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v32, v32, v39, vcc_lo +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v23, v33, 16, v23 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v22, v34, 16, v22 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v21, 0xffff, v21 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, v35, v36 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 -; GFX11-FAKE16-NEXT: v_dual_add_f32 v38, 0x40c00000, v48 :: v_dual_cndmask_b32 v35, v35, v49 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v36, 16, v36 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff, v34 -; GFX11-FAKE16-NEXT: v_bfe_u32 v48, v37, 16, 1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_bfe_u32 v39, v38, 16, 1 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v35, 16, v35 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v36, 0xffff, v36 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v174, v33, 16, v31 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v171, v32, 16, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, v48, v37 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v33, 0xffff0000, v175 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v34, 16, v175 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v39, v39, v38 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v177, v35, 16, v36 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, 0x7fff, v31 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v35, 0x400000, v37 -; GFX11-FAKE16-NEXT: v_dual_add_f32 v33, 0x40c00000, v33 :: v_dual_add_f32 v34, 0x40c00000, v34 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v37, v37 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v39 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v36, 0x400000, v38 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v37, v20 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff0000, v19 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v19, 16, v19 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v20, v20 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v21, v32, 16, v21 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v35 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v35, 0x400000, v20 +; GFX11-FAKE16-NEXT: v_dual_add_f32 v19, 0x40c00000, v19 :: v_dual_add_f32 v34, 0x40c00000, v34 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v36 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v20, v33, v35 :: v_dual_and_b32 v33, 0xffff0000, v18 +; GFX11-FAKE16-NEXT: v_bfe_u32 v38, v19, 16, 1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v34, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 +; GFX11-FAKE16-NEXT: v_dual_add_f32 v33, 0x40c00000, v33 :: v_dual_lshlrev_b32 v18, 16, v18 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, v38, v19 +; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v32, v32, v37 :: v_dual_add_nc_u32 v35, v35, v34 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v19 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-FAKE16-NEXT: v_bfe_u32 v37, v33, 16, 1 -; GFX11-FAKE16-NEXT: v_bfe_u32 v39, v34, 16, 1 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v31, v31, v35, vcc_lo -; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v173 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v48, 16, v173 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v49, 0x400000, v33 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v31, 16, v31 -; GFX11-FAKE16-NEXT: v_dual_add_f32 v35, 0x40c00000, v35 :: v_dual_cndmask_b32 v32, v32, v36 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, v37, v33 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v37, v39, v34 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, 0x7fff, v36 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v19, v19 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v34 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_bfe_u32 v38, v35, 16, 1 +; GFX11-FAKE16-NEXT: v_dual_add_f32 v18, 0x40c00000, v18 :: v_dual_add_nc_u32 v37, v37, v33 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v19, v36, v38, vcc_lo ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, 0x7fff, v36 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v37, 0x7fff, v37 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v38, v38, v35 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v31, 0xffff, v31 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v122, v3, 16, v6 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v37, v39, vcc_lo +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v20, 16, v20 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, 0x7fff, v37 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v33 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v18 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v35, v39, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v18, 16, 1 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v33, v33 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v37, 0x7fff, v38 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v38, 0x40c00000, v48 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v35 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v33, v36, v49 :: v_dual_lshlrev_b32 v48, 16, v183 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_bfe_u32 v36, v38, 16, 1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v49, 0x400000, v38 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v48, 0x40c00000, v48 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v34, 16, v34 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v35, v37, v39, vcc_lo -; GFX11-FAKE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v172 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v39, 16, v172 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, v36, v38 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v55, 0x400000, v48 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v19, 16, v19 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v20, 0xffff, v20 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, v35, v18 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v33, v36, v37, vcc_lo +; GFX11-FAKE16-NEXT: v_and_b32_e32 v36, 0xffff0000, v17 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v17, 16, v17 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v37, 16, v16 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v18, v18 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_dual_add_f32 v36, 0x40c00000, v36 :: v_dual_add_f32 v17, 0x40c00000, v17 ; GFX11-FAKE16-NEXT: v_add_f32_e32 v37, 0x40c00000, v37 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v39, 0x40c00000, v39 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, 0x7fff, v36 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff, v34 -; GFX11-FAKE16-NEXT: v_bfe_u32 v50, v37, 16, 1 -; GFX11-FAKE16-NEXT: v_bfe_u32 v38, v39, 16, 1 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v36, v36, v49, vcc_lo -; GFX11-FAKE16-NEXT: v_or_b32_e32 v54, 0x400000, v39 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v39, v39 -; GFX11-FAKE16-NEXT: v_dual_add_f32 v50, 0x40c00000, v51 :: v_dual_add_nc_u32 v49, v50, v37 -; GFX11-FAKE16-NEXT: v_bfe_u32 v51, v48, 16, 1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v38, v38, v39 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v53, 0x400000, v37 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v16, 0xffff0000, v16 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v18, v35, v38, vcc_lo ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v49, 0x7fff, v49 -; GFX11-FAKE16-NEXT: v_bfe_u32 v52, v50, 16, 1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v51, v51, v48 +; GFX11-FAKE16-NEXT: v_bfe_u32 v39, v36, 16, 1 +; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v17, 16, 1 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v50, 0x400000, v17 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v16, 0x40c00000, v16 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v17, v17 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v38, v39, v36 +; GFX11-FAKE16-NEXT: v_bfe_u32 v39, v37, 16, 1 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, v35, v17 +; GFX11-FAKE16-NEXT: v_bfe_u32 v48, v16, 16, 1 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v51, 0x400000, v37 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v38, 0x7fff, v38 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v36, 16, v36 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v35, 16, v35 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v52, v52, v50 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v38, v38, v54 :: v_dual_add_nc_u32 v51, 0x7fff, v51 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v48, v48 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v36, 0xffff, v36 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v39, 0x7fff, v52 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v52, 0x400000, v50 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v38, 16, v38 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v48, v51, v55, vcc_lo +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v39, v39, v37 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v49, 0x400000, v36 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v48, v48, v16 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v18, 16, v18 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v39, 0x7fff, v39 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v17, v35, v50, vcc_lo ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v37, v37 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v184, v32, 16, v31 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v175, v33, 16, v34 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v38, 0xffff, v38 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v48, 16, v48 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v37, v49, v53, vcc_lo -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v50, v50 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v173, v35, 16, v36 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v97, v8, 16, v10 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v48, 0xffff, v48 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v37, 16, v37 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v39, v39, v52, vcc_lo -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v86, v9, 16, v12 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v76, v11, 16, v13 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v67, v14, 16, v17 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v172, v37, 16, v38 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v39, 16, v39 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v59, v16, 16, v19 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v52, v18, 16, v20 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v46, v21, 16, v23 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v41, v22, 16, v25 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v183, v39, 16, v48 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v37, v24, 16, v27 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v34, v26, 16, v28 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v32, v29, 16, v30 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v48 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v48, 0x400000, v16 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v34, 16, v34 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v17, 16, v17 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v37, v39, v51, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v19, 0xffff, v19 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v18, 0xffff, v18 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v17, 0xffff, v17 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v36, v38, v49, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v16, v16 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v20, v32, 16, v20 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v19, v34, 16, v19 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v18, v33, 16, v18 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v36, 16, v36 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v16, v35, v48, vcc_lo +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v35, 16, v37 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v17, v36, 16, v17 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v16, 16, v16 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff, v35 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v16, v16, 16, v35 ; GFX11-FAKE16-NEXT: .LBB43_3: ; %end -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v3, v41 :: v_dual_mov_b32 v4, v46 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v6, v59 :: v_dual_mov_b32 v9, v86 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v7, v67 :: v_dual_mov_b32 v8, v76 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v10, v97 :: v_dual_mov_b32 v13, v136 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v11, v109 :: v_dual_mov_b32 v12, v122 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v14, v151 :: v_dual_mov_b32 v17, v172 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v18, v173 :: v_dual_mov_b32 v19, v175 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v20, v184 :: v_dual_mov_b32 v23, v174 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v22, v171 :: v_dual_mov_b32 v25, v169 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v26, v170 :: v_dual_mov_b32 v29, v180 -; GFX11-FAKE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-FAKE16-NEXT: scratch_load_b32 v184, off, s32 -; GFX11-FAKE16-NEXT: scratch_load_b32 v175, off, s32 offset:4 -; GFX11-FAKE16-NEXT: scratch_load_b32 v174, off, s32 offset:8 -; GFX11-FAKE16-NEXT: scratch_load_b32 v173, off, s32 offset:12 -; GFX11-FAKE16-NEXT: scratch_load_b32 v172, off, s32 offset:16 -; GFX11-FAKE16-NEXT: scratch_load_b32 v171, off, s32 offset:20 -; GFX11-FAKE16-NEXT: scratch_load_b32 v170, off, s32 offset:24 -; GFX11-FAKE16-NEXT: scratch_load_b32 v169, off, s32 offset:28 -; GFX11-FAKE16-NEXT: scratch_load_b32 v168, off, s32 offset:32 -; GFX11-FAKE16-NEXT: scratch_load_b32 v159, off, s32 offset:36 -; GFX11-FAKE16-NEXT: scratch_load_b32 v158, off, s32 offset:40 -; GFX11-FAKE16-NEXT: scratch_load_b32 v157, off, s32 offset:44 -; GFX11-FAKE16-NEXT: scratch_load_b32 v156, off, s32 offset:48 -; GFX11-FAKE16-NEXT: scratch_load_b32 v155, off, s32 offset:52 -; GFX11-FAKE16-NEXT: scratch_load_b32 v154, off, s32 offset:56 -; GFX11-FAKE16-NEXT: scratch_load_b32 v153, off, s32 offset:60 -; GFX11-FAKE16-NEXT: scratch_load_b32 v152, off, s32 offset:64 -; GFX11-FAKE16-NEXT: scratch_load_b32 v143, off, s32 offset:68 -; GFX11-FAKE16-NEXT: scratch_load_b32 v142, off, s32 offset:72 -; GFX11-FAKE16-NEXT: scratch_load_b32 v141, off, s32 offset:76 -; GFX11-FAKE16-NEXT: scratch_load_b32 v140, off, s32 offset:80 -; GFX11-FAKE16-NEXT: scratch_load_b32 v139, off, s32 offset:84 -; GFX11-FAKE16-NEXT: scratch_load_b32 v138, off, s32 offset:88 -; GFX11-FAKE16-NEXT: scratch_load_b32 v137, off, s32 offset:92 -; GFX11-FAKE16-NEXT: scratch_load_b32 v136, off, s32 offset:96 -; GFX11-FAKE16-NEXT: scratch_load_b32 v127, off, s32 offset:100 -; GFX11-FAKE16-NEXT: scratch_load_b32 v126, off, s32 offset:104 -; GFX11-FAKE16-NEXT: scratch_load_b32 v125, off, s32 offset:108 -; GFX11-FAKE16-NEXT: scratch_load_b32 v124, off, s32 offset:112 -; GFX11-FAKE16-NEXT: scratch_load_b32 v123, off, s32 offset:116 -; GFX11-FAKE16-NEXT: scratch_load_b32 v122, off, s32 offset:120 -; GFX11-FAKE16-NEXT: scratch_load_b32 v121, off, s32 offset:124 -; GFX11-FAKE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-FAKE16-NEXT: scratch_load_b32 v120, off, s32 offset:128 -; GFX11-FAKE16-NEXT: scratch_load_b32 v111, off, s32 offset:132 -; GFX11-FAKE16-NEXT: scratch_load_b32 v110, off, s32 offset:136 -; GFX11-FAKE16-NEXT: scratch_load_b32 v109, off, s32 offset:140 -; GFX11-FAKE16-NEXT: scratch_load_b32 v108, off, s32 offset:144 -; GFX11-FAKE16-NEXT: scratch_load_b32 v107, off, s32 offset:148 -; GFX11-FAKE16-NEXT: scratch_load_b32 v106, off, s32 offset:152 -; GFX11-FAKE16-NEXT: scratch_load_b32 v105, off, s32 offset:156 -; GFX11-FAKE16-NEXT: scratch_load_b32 v104, off, s32 offset:160 -; GFX11-FAKE16-NEXT: scratch_load_b32 v95, off, s32 offset:164 -; GFX11-FAKE16-NEXT: scratch_load_b32 v94, off, s32 offset:168 -; GFX11-FAKE16-NEXT: scratch_load_b32 v93, off, s32 offset:172 -; GFX11-FAKE16-NEXT: scratch_load_b32 v92, off, s32 offset:176 -; GFX11-FAKE16-NEXT: scratch_load_b32 v91, off, s32 offset:180 -; GFX11-FAKE16-NEXT: scratch_load_b32 v90, off, s32 offset:184 -; GFX11-FAKE16-NEXT: scratch_load_b32 v89, off, s32 offset:188 -; GFX11-FAKE16-NEXT: scratch_load_b32 v88, off, s32 offset:192 -; GFX11-FAKE16-NEXT: scratch_load_b32 v79, off, s32 offset:196 -; GFX11-FAKE16-NEXT: scratch_load_b32 v78, off, s32 offset:200 -; GFX11-FAKE16-NEXT: scratch_load_b32 v77, off, s32 offset:204 -; GFX11-FAKE16-NEXT: scratch_load_b32 v76, off, s32 offset:208 -; GFX11-FAKE16-NEXT: scratch_load_b32 v75, off, s32 offset:212 -; GFX11-FAKE16-NEXT: scratch_load_b32 v74, off, s32 offset:216 -; GFX11-FAKE16-NEXT: scratch_load_b32 v73, off, s32 offset:220 -; GFX11-FAKE16-NEXT: scratch_load_b32 v72, off, s32 offset:224 -; GFX11-FAKE16-NEXT: scratch_load_b32 v63, off, s32 offset:228 -; GFX11-FAKE16-NEXT: scratch_load_b32 v62, off, s32 offset:232 -; GFX11-FAKE16-NEXT: scratch_load_b32 v61, off, s32 offset:236 -; GFX11-FAKE16-NEXT: scratch_load_b32 v60, off, s32 offset:240 -; GFX11-FAKE16-NEXT: scratch_load_b32 v59, off, s32 offset:244 -; GFX11-FAKE16-NEXT: scratch_load_b32 v58, off, s32 offset:248 -; GFX11-FAKE16-NEXT: scratch_load_b32 v57, off, s32 offset:252 -; GFX11-FAKE16-NEXT: s_clause 0x8 ; 36-byte Folded Reload -; GFX11-FAKE16-NEXT: scratch_load_b32 v56, off, s32 offset:256 -; GFX11-FAKE16-NEXT: scratch_load_b32 v47, off, s32 offset:260 -; GFX11-FAKE16-NEXT: scratch_load_b32 v46, off, s32 offset:264 -; GFX11-FAKE16-NEXT: scratch_load_b32 v45, off, s32 offset:268 -; GFX11-FAKE16-NEXT: scratch_load_b32 v44, off, s32 offset:272 -; GFX11-FAKE16-NEXT: scratch_load_b32 v43, off, s32 offset:276 -; GFX11-FAKE16-NEXT: scratch_load_b32 v42, off, s32 offset:280 -; GFX11-FAKE16-NEXT: scratch_load_b32 v41, off, s32 offset:284 -; GFX11-FAKE16-NEXT: scratch_load_b32 v40, off, s32 offset:288 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, v32 :: v_dual_mov_b32 v1, v34 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v2, v37 :: v_dual_mov_b32 v5, v52 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v16, v183 :: v_dual_mov_b32 v21, v177 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v24, v176 :: v_dual_mov_b32 v27, v181 -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v28, v182 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v30, v179 :: v_dual_mov_b32 v31, v178 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-FAKE16-NEXT: .LBB43_4: -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr15 ; GFX11-FAKE16-NEXT: s_branch .LBB43_2 %cmp = icmp eq i32 %b, 0 br i1 %cmp, label %cmp.true, label %cmp.false @@ -72767,252 +71503,80 @@ define inreg <32 x float> @bitcast_v64f16_to_v32f32_scalar(<64 x half> inreg %a, ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 -; GFX11-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:292 -; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:288 -; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:284 -; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:280 -; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:276 -; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:272 -; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:268 -; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:264 -; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:260 -; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:256 -; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:252 -; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:248 -; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:244 -; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:240 -; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:236 -; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:232 -; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:228 -; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:224 -; GFX11-NEXT: scratch_store_b32 off, v74, s32 offset:220 -; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:216 -; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:212 -; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:208 -; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:204 -; GFX11-NEXT: scratch_store_b32 off, v79, s32 offset:200 -; GFX11-NEXT: scratch_store_b32 off, v88, s32 offset:196 -; GFX11-NEXT: scratch_store_b32 off, v89, s32 offset:192 -; GFX11-NEXT: scratch_store_b32 off, v90, s32 offset:188 -; GFX11-NEXT: scratch_store_b32 off, v91, s32 offset:184 -; GFX11-NEXT: scratch_store_b32 off, v92, s32 offset:180 -; GFX11-NEXT: scratch_store_b32 off, v93, s32 offset:176 -; GFX11-NEXT: scratch_store_b32 off, v94, s32 offset:172 -; GFX11-NEXT: scratch_store_b32 off, v95, s32 offset:168 -; GFX11-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-NEXT: scratch_store_b32 off, v104, s32 offset:164 -; GFX11-NEXT: scratch_store_b32 off, v105, s32 offset:160 -; GFX11-NEXT: scratch_store_b32 off, v106, s32 offset:156 -; GFX11-NEXT: scratch_store_b32 off, v107, s32 offset:152 -; GFX11-NEXT: scratch_store_b32 off, v108, s32 offset:148 -; GFX11-NEXT: scratch_store_b32 off, v109, s32 offset:144 -; GFX11-NEXT: scratch_store_b32 off, v110, s32 offset:140 -; GFX11-NEXT: scratch_store_b32 off, v111, s32 offset:136 -; GFX11-NEXT: scratch_store_b32 off, v120, s32 offset:132 -; GFX11-NEXT: scratch_store_b32 off, v121, s32 offset:128 -; GFX11-NEXT: scratch_store_b32 off, v122, s32 offset:124 -; GFX11-NEXT: scratch_store_b32 off, v123, s32 offset:120 -; GFX11-NEXT: scratch_store_b32 off, v124, s32 offset:116 -; GFX11-NEXT: scratch_store_b32 off, v125, s32 offset:112 -; GFX11-NEXT: scratch_store_b32 off, v126, s32 offset:108 -; GFX11-NEXT: scratch_store_b32 off, v127, s32 offset:104 -; GFX11-NEXT: scratch_store_b32 off, v136, s32 offset:100 -; GFX11-NEXT: scratch_store_b32 off, v137, s32 offset:96 -; GFX11-NEXT: scratch_store_b32 off, v138, s32 offset:92 -; GFX11-NEXT: scratch_store_b32 off, v139, s32 offset:88 -; GFX11-NEXT: scratch_store_b32 off, v140, s32 offset:84 -; GFX11-NEXT: scratch_store_b32 off, v141, s32 offset:80 -; GFX11-NEXT: scratch_store_b32 off, v142, s32 offset:76 -; GFX11-NEXT: scratch_store_b32 off, v143, s32 offset:72 -; GFX11-NEXT: scratch_store_b32 off, v152, s32 offset:68 -; GFX11-NEXT: scratch_store_b32 off, v153, s32 offset:64 -; GFX11-NEXT: scratch_store_b32 off, v154, s32 offset:60 -; GFX11-NEXT: scratch_store_b32 off, v155, s32 offset:56 -; GFX11-NEXT: scratch_store_b32 off, v156, s32 offset:52 -; GFX11-NEXT: scratch_store_b32 off, v157, s32 offset:48 -; GFX11-NEXT: scratch_store_b32 off, v158, s32 offset:44 -; GFX11-NEXT: scratch_store_b32 off, v159, s32 offset:40 -; GFX11-NEXT: s_clause 0x9 ; 40-byte Folded Spill -; GFX11-NEXT: scratch_store_b32 off, v168, s32 offset:36 -; GFX11-NEXT: scratch_store_b32 off, v169, s32 offset:32 -; GFX11-NEXT: scratch_store_b32 off, v170, s32 offset:28 -; GFX11-NEXT: scratch_store_b32 off, v171, s32 offset:24 -; GFX11-NEXT: scratch_store_b32 off, v172, s32 offset:20 -; GFX11-NEXT: scratch_store_b32 off, v173, s32 offset:16 -; GFX11-NEXT: scratch_store_b32 off, v174, s32 offset:12 -; GFX11-NEXT: scratch_store_b32 off, v175, s32 offset:8 -; GFX11-NEXT: scratch_store_b32 off, v184, s32 offset:4 -; GFX11-NEXT: scratch_store_b32 off, v185, s32 -; GFX11-NEXT: v_dual_mov_b32 v176, v13 :: v_dual_mov_b32 v177, v12 -; GFX11-NEXT: v_dual_mov_b32 v178, v11 :: v_dual_mov_b32 v179, v10 -; GFX11-NEXT: v_dual_mov_b32 v180, v9 :: v_dual_mov_b32 v181, v8 -; GFX11-NEXT: v_dual_mov_b32 v182, v7 :: v_dual_mov_b32 v183, v6 -; GFX11-NEXT: v_dual_mov_b32 v170, v5 :: v_dual_mov_b32 v171, v4 -; GFX11-NEXT: v_dual_mov_b32 v172, v3 :: v_dual_mov_b32 v173, v2 -; GFX11-NEXT: v_dual_mov_b32 v174, v1 :: v_dual_mov_b32 v175, v0 -; GFX11-NEXT: v_dual_mov_b32 v184, s28 :: v_dual_mov_b32 v185, s29 +; GFX11-NEXT: v_dual_mov_b32 v31, v13 :: v_dual_mov_b32 v30, v12 +; GFX11-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 +; GFX11-NEXT: v_dual_mov_b32 v27, v9 :: v_dual_mov_b32 v26, v8 +; GFX11-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-NEXT: v_dual_mov_b32 v16, s28 :: v_dual_mov_b32 v17, s29 ; GFX11-NEXT: s_mov_b32 s4, 0 ; GFX11-NEXT: s_and_b32 s5, vcc_lo, exec_lo ; GFX11-NEXT: s_cbranch_scc0 .LBB47_4 ; GFX11-NEXT: ; %bb.1: ; %cmp.false -; GFX11-NEXT: v_dual_mov_b32 v47, s0 :: v_dual_mov_b32 v52, s2 -; GFX11-NEXT: v_dual_mov_b32 v49, s1 :: v_dual_mov_b32 v56, s3 -; GFX11-NEXT: v_dual_mov_b32 v61, s16 :: v_dual_mov_b32 v74, s18 -; GFX11-NEXT: v_dual_mov_b32 v67, s17 :: v_dual_mov_b32 v82, s19 -; GFX11-NEXT: v_dual_mov_b32 v91, s20 :: v_dual_mov_b32 v112, s22 -; GFX11-NEXT: v_dual_mov_b32 v101, s21 :: v_dual_mov_b32 v124, s23 -; GFX11-NEXT: v_dual_mov_b32 v137, s24 :: v_dual_mov_b32 v14, s26 -; GFX11-NEXT: v_dual_mov_b32 v151, s25 :: v_dual_mov_b32 v30, s27 +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-NEXT: v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v5, s17 +; GFX11-NEXT: v_dual_mov_b32 v6, s18 :: v_dual_mov_b32 v7, s19 +; GFX11-NEXT: v_dual_mov_b32 v8, s20 :: v_dual_mov_b32 v9, s21 +; GFX11-NEXT: v_dual_mov_b32 v10, s22 :: v_dual_mov_b32 v11, s23 +; GFX11-NEXT: v_dual_mov_b32 v12, s24 :: v_dual_mov_b32 v13, s25 +; GFX11-NEXT: v_dual_mov_b32 v14, s26 :: v_dual_mov_b32 v15, s27 ; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 ; GFX11-NEXT: s_cbranch_vccnz .LBB47_3 ; GFX11-NEXT: .LBB47_2: ; %cmp.true -; GFX11-NEXT: v_pk_add_f16 v30, 0x200, s27 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v15, 0x200, s27 op_sel_hi:[0,1] ; GFX11-NEXT: v_pk_add_f16 v14, 0x200, s26 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v176, 0x200, v176 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v177, 0x200, v177 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v178, 0x200, v178 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v179, 0x200, v179 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v180, 0x200, v180 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v181, 0x200, v181 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v182, 0x200, v182 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v183, 0x200, v183 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v170, 0x200, v170 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v171, 0x200, v171 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v172, 0x200, v172 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v173, 0x200, v173 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v174, 0x200, v174 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v175, 0x200, v175 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v185, 0x200, v185 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v184, 0x200, v184 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v151, 0x200, s25 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v137, 0x200, s24 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v124, 0x200, s23 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v112, 0x200, s22 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v101, 0x200, s21 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v91, 0x200, s20 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v82, 0x200, s19 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v74, 0x200, s18 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v67, 0x200, s17 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v61, 0x200, s16 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v56, 0x200, s3 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v52, 0x200, s2 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v49, 0x200, s1 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v47, 0x200, s0 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v13, 0x200, s25 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v12, 0x200, s24 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v11, 0x200, s23 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v10, 0x200, s22 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v9, 0x200, s21 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v8, 0x200, s20 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v7, 0x200, s19 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v6, 0x200, s18 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v5, 0x200, s17 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v4, 0x200, s16 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v3, 0x200, s3 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v2, 0x200, s2 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v1, 0x200, s1 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v0, 0x200, s0 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v31, 0x200, v31 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v30, 0x200, v30 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v29, 0x200, v29 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v28, 0x200, v28 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v27, 0x200, v27 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v26, 0x200, v26 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v25, 0x200, v25 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v24, 0x200, v24 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v23, 0x200, v23 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v22, 0x200, v22 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v21, 0x200, v21 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v20, 0x200, v20 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v19, 0x200, v19 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v18, 0x200, v18 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v17, 0x200, v17 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v16, 0x200, v16 op_sel_hi:[0,1] ; GFX11-NEXT: .LBB47_3: ; %end -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_dual_mov_b32 v0, v47 :: v_dual_mov_b32 v1, v49 -; GFX11-NEXT: v_dual_mov_b32 v3, v56 :: v_dual_mov_b32 v4, v61 -; GFX11-NEXT: v_dual_mov_b32 v6, v74 :: v_dual_mov_b32 v9, v101 -; GFX11-NEXT: v_dual_mov_b32 v7, v82 :: v_dual_mov_b32 v8, v91 -; GFX11-NEXT: v_dual_mov_b32 v11, v124 :: v_dual_mov_b32 v12, v137 -; GFX11-NEXT: v_dual_mov_b32 v15, v30 :: v_dual_mov_b32 v16, v184 -; GFX11-NEXT: v_dual_mov_b32 v17, v185 :: v_dual_mov_b32 v18, v175 -; GFX11-NEXT: v_dual_mov_b32 v19, v174 :: v_dual_mov_b32 v20, v173 -; GFX11-NEXT: v_dual_mov_b32 v21, v172 :: v_dual_mov_b32 v22, v171 -; GFX11-NEXT: v_dual_mov_b32 v23, v170 :: v_dual_mov_b32 v24, v183 -; GFX11-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-NEXT: scratch_load_b32 v185, off, s32 -; GFX11-NEXT: scratch_load_b32 v184, off, s32 offset:4 -; GFX11-NEXT: scratch_load_b32 v175, off, s32 offset:8 -; GFX11-NEXT: scratch_load_b32 v174, off, s32 offset:12 -; GFX11-NEXT: scratch_load_b32 v173, off, s32 offset:16 -; GFX11-NEXT: scratch_load_b32 v172, off, s32 offset:20 -; GFX11-NEXT: scratch_load_b32 v171, off, s32 offset:24 -; GFX11-NEXT: scratch_load_b32 v170, off, s32 offset:28 -; GFX11-NEXT: scratch_load_b32 v169, off, s32 offset:32 -; GFX11-NEXT: scratch_load_b32 v168, off, s32 offset:36 -; GFX11-NEXT: scratch_load_b32 v159, off, s32 offset:40 -; GFX11-NEXT: scratch_load_b32 v158, off, s32 offset:44 -; GFX11-NEXT: scratch_load_b32 v157, off, s32 offset:48 -; GFX11-NEXT: scratch_load_b32 v156, off, s32 offset:52 -; GFX11-NEXT: scratch_load_b32 v155, off, s32 offset:56 -; GFX11-NEXT: scratch_load_b32 v154, off, s32 offset:60 -; GFX11-NEXT: scratch_load_b32 v153, off, s32 offset:64 -; GFX11-NEXT: scratch_load_b32 v152, off, s32 offset:68 -; GFX11-NEXT: scratch_load_b32 v143, off, s32 offset:72 -; GFX11-NEXT: scratch_load_b32 v142, off, s32 offset:76 -; GFX11-NEXT: scratch_load_b32 v141, off, s32 offset:80 -; GFX11-NEXT: scratch_load_b32 v140, off, s32 offset:84 -; GFX11-NEXT: scratch_load_b32 v139, off, s32 offset:88 -; GFX11-NEXT: scratch_load_b32 v138, off, s32 offset:92 -; GFX11-NEXT: scratch_load_b32 v137, off, s32 offset:96 -; GFX11-NEXT: scratch_load_b32 v136, off, s32 offset:100 -; GFX11-NEXT: scratch_load_b32 v127, off, s32 offset:104 -; GFX11-NEXT: scratch_load_b32 v126, off, s32 offset:108 -; GFX11-NEXT: scratch_load_b32 v125, off, s32 offset:112 -; GFX11-NEXT: scratch_load_b32 v124, off, s32 offset:116 -; GFX11-NEXT: scratch_load_b32 v123, off, s32 offset:120 -; GFX11-NEXT: scratch_load_b32 v122, off, s32 offset:124 -; GFX11-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-NEXT: scratch_load_b32 v121, off, s32 offset:128 -; GFX11-NEXT: scratch_load_b32 v120, off, s32 offset:132 -; GFX11-NEXT: scratch_load_b32 v111, off, s32 offset:136 -; GFX11-NEXT: scratch_load_b32 v110, off, s32 offset:140 -; GFX11-NEXT: scratch_load_b32 v109, off, s32 offset:144 -; GFX11-NEXT: scratch_load_b32 v108, off, s32 offset:148 -; GFX11-NEXT: scratch_load_b32 v107, off, s32 offset:152 -; GFX11-NEXT: scratch_load_b32 v106, off, s32 offset:156 -; GFX11-NEXT: scratch_load_b32 v105, off, s32 offset:160 -; GFX11-NEXT: scratch_load_b32 v104, off, s32 offset:164 -; GFX11-NEXT: scratch_load_b32 v95, off, s32 offset:168 -; GFX11-NEXT: scratch_load_b32 v94, off, s32 offset:172 -; GFX11-NEXT: scratch_load_b32 v93, off, s32 offset:176 -; GFX11-NEXT: scratch_load_b32 v92, off, s32 offset:180 -; GFX11-NEXT: scratch_load_b32 v91, off, s32 offset:184 -; GFX11-NEXT: scratch_load_b32 v90, off, s32 offset:188 -; GFX11-NEXT: scratch_load_b32 v89, off, s32 offset:192 -; GFX11-NEXT: scratch_load_b32 v88, off, s32 offset:196 -; GFX11-NEXT: scratch_load_b32 v79, off, s32 offset:200 -; GFX11-NEXT: scratch_load_b32 v78, off, s32 offset:204 -; GFX11-NEXT: scratch_load_b32 v77, off, s32 offset:208 -; GFX11-NEXT: scratch_load_b32 v76, off, s32 offset:212 -; GFX11-NEXT: scratch_load_b32 v75, off, s32 offset:216 -; GFX11-NEXT: scratch_load_b32 v74, off, s32 offset:220 -; GFX11-NEXT: scratch_load_b32 v73, off, s32 offset:224 -; GFX11-NEXT: scratch_load_b32 v72, off, s32 offset:228 -; GFX11-NEXT: scratch_load_b32 v63, off, s32 offset:232 -; GFX11-NEXT: scratch_load_b32 v62, off, s32 offset:236 -; GFX11-NEXT: scratch_load_b32 v61, off, s32 offset:240 -; GFX11-NEXT: scratch_load_b32 v60, off, s32 offset:244 -; GFX11-NEXT: scratch_load_b32 v59, off, s32 offset:248 -; GFX11-NEXT: scratch_load_b32 v58, off, s32 offset:252 -; GFX11-NEXT: s_clause 0x9 ; 40-byte Folded Reload -; GFX11-NEXT: scratch_load_b32 v57, off, s32 offset:256 -; GFX11-NEXT: scratch_load_b32 v56, off, s32 offset:260 -; GFX11-NEXT: scratch_load_b32 v47, off, s32 offset:264 -; GFX11-NEXT: scratch_load_b32 v46, off, s32 offset:268 -; GFX11-NEXT: scratch_load_b32 v45, off, s32 offset:272 -; GFX11-NEXT: scratch_load_b32 v44, off, s32 offset:276 -; GFX11-NEXT: scratch_load_b32 v43, off, s32 offset:280 -; GFX11-NEXT: scratch_load_b32 v42, off, s32 offset:284 -; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:288 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:292 -; GFX11-NEXT: v_dual_mov_b32 v2, v52 :: v_dual_mov_b32 v5, v67 -; GFX11-NEXT: v_dual_mov_b32 v10, v112 :: v_dual_mov_b32 v13, v151 -; GFX11-NEXT: v_dual_mov_b32 v25, v182 :: v_dual_mov_b32 v26, v181 -; GFX11-NEXT: v_dual_mov_b32 v27, v180 :: v_dual_mov_b32 v28, v179 -; GFX11-NEXT: v_dual_mov_b32 v29, v178 :: v_dual_mov_b32 v30, v177 -; GFX11-NEXT: v_mov_b32_e32 v31, v176 -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; GFX11-NEXT: .LBB47_4: -; GFX11-NEXT: ; implicit-def: $vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78 -; GFX11-NEXT: ; implicit-def: $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79 -; GFX11-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-NEXT: ; implicit-def: $vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81 -; GFX11-NEXT: ; implicit-def: $vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84 -; GFX11-NEXT: ; implicit-def: $vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88 -; GFX11-NEXT: ; implicit-def: $vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93 -; GFX11-NEXT: ; implicit-def: $vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99 -; GFX11-NEXT: ; implicit-def: $vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106 -; GFX11-NEXT: ; implicit-def: $vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114 -; GFX11-NEXT: ; implicit-def: $vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123 -; GFX11-NEXT: ; implicit-def: $vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133 -; GFX11-NEXT: ; implicit-def: $vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144 -; GFX11-NEXT: ; implicit-def: $vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156 -; GFX11-NEXT: ; implicit-def: $vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169 +; GFX11-NEXT: ; implicit-def: $vgpr0 +; GFX11-NEXT: ; implicit-def: $vgpr1 +; GFX11-NEXT: ; implicit-def: $vgpr2 +; GFX11-NEXT: ; implicit-def: $vgpr3 +; GFX11-NEXT: ; implicit-def: $vgpr4 +; GFX11-NEXT: ; implicit-def: $vgpr5 +; GFX11-NEXT: ; implicit-def: $vgpr6 +; GFX11-NEXT: ; implicit-def: $vgpr7 +; GFX11-NEXT: ; implicit-def: $vgpr8 +; GFX11-NEXT: ; implicit-def: $vgpr9 +; GFX11-NEXT: ; implicit-def: $vgpr10 +; GFX11-NEXT: ; implicit-def: $vgpr11 +; GFX11-NEXT: ; implicit-def: $vgpr12 +; GFX11-NEXT: ; implicit-def: $vgpr13 +; GFX11-NEXT: ; implicit-def: $vgpr14 +; GFX11-NEXT: ; implicit-def: $vgpr15 ; GFX11-NEXT: s_branch .LBB47_2 %cmp = icmp eq i32 %b, 0 br i1 %cmp, label %cmp.true, label %cmp.false @@ -75767,252 +74331,80 @@ define inreg <32 x float> @bitcast_v64i16_to_v32f32_scalar(<64 x i16> inreg %a, ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 -; GFX11-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:292 -; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:288 -; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:284 -; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:280 -; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:276 -; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:272 -; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:268 -; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:264 -; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:260 -; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:256 -; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:252 -; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:248 -; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:244 -; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:240 -; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:236 -; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:232 -; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:228 -; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:224 -; GFX11-NEXT: scratch_store_b32 off, v74, s32 offset:220 -; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:216 -; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:212 -; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:208 -; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:204 -; GFX11-NEXT: scratch_store_b32 off, v79, s32 offset:200 -; GFX11-NEXT: scratch_store_b32 off, v88, s32 offset:196 -; GFX11-NEXT: scratch_store_b32 off, v89, s32 offset:192 -; GFX11-NEXT: scratch_store_b32 off, v90, s32 offset:188 -; GFX11-NEXT: scratch_store_b32 off, v91, s32 offset:184 -; GFX11-NEXT: scratch_store_b32 off, v92, s32 offset:180 -; GFX11-NEXT: scratch_store_b32 off, v93, s32 offset:176 -; GFX11-NEXT: scratch_store_b32 off, v94, s32 offset:172 -; GFX11-NEXT: scratch_store_b32 off, v95, s32 offset:168 -; GFX11-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-NEXT: scratch_store_b32 off, v104, s32 offset:164 -; GFX11-NEXT: scratch_store_b32 off, v105, s32 offset:160 -; GFX11-NEXT: scratch_store_b32 off, v106, s32 offset:156 -; GFX11-NEXT: scratch_store_b32 off, v107, s32 offset:152 -; GFX11-NEXT: scratch_store_b32 off, v108, s32 offset:148 -; GFX11-NEXT: scratch_store_b32 off, v109, s32 offset:144 -; GFX11-NEXT: scratch_store_b32 off, v110, s32 offset:140 -; GFX11-NEXT: scratch_store_b32 off, v111, s32 offset:136 -; GFX11-NEXT: scratch_store_b32 off, v120, s32 offset:132 -; GFX11-NEXT: scratch_store_b32 off, v121, s32 offset:128 -; GFX11-NEXT: scratch_store_b32 off, v122, s32 offset:124 -; GFX11-NEXT: scratch_store_b32 off, v123, s32 offset:120 -; GFX11-NEXT: scratch_store_b32 off, v124, s32 offset:116 -; GFX11-NEXT: scratch_store_b32 off, v125, s32 offset:112 -; GFX11-NEXT: scratch_store_b32 off, v126, s32 offset:108 -; GFX11-NEXT: scratch_store_b32 off, v127, s32 offset:104 -; GFX11-NEXT: scratch_store_b32 off, v136, s32 offset:100 -; GFX11-NEXT: scratch_store_b32 off, v137, s32 offset:96 -; GFX11-NEXT: scratch_store_b32 off, v138, s32 offset:92 -; GFX11-NEXT: scratch_store_b32 off, v139, s32 offset:88 -; GFX11-NEXT: scratch_store_b32 off, v140, s32 offset:84 -; GFX11-NEXT: scratch_store_b32 off, v141, s32 offset:80 -; GFX11-NEXT: scratch_store_b32 off, v142, s32 offset:76 -; GFX11-NEXT: scratch_store_b32 off, v143, s32 offset:72 -; GFX11-NEXT: scratch_store_b32 off, v152, s32 offset:68 -; GFX11-NEXT: scratch_store_b32 off, v153, s32 offset:64 -; GFX11-NEXT: scratch_store_b32 off, v154, s32 offset:60 -; GFX11-NEXT: scratch_store_b32 off, v155, s32 offset:56 -; GFX11-NEXT: scratch_store_b32 off, v156, s32 offset:52 -; GFX11-NEXT: scratch_store_b32 off, v157, s32 offset:48 -; GFX11-NEXT: scratch_store_b32 off, v158, s32 offset:44 -; GFX11-NEXT: scratch_store_b32 off, v159, s32 offset:40 -; GFX11-NEXT: s_clause 0x9 ; 40-byte Folded Spill -; GFX11-NEXT: scratch_store_b32 off, v168, s32 offset:36 -; GFX11-NEXT: scratch_store_b32 off, v169, s32 offset:32 -; GFX11-NEXT: scratch_store_b32 off, v170, s32 offset:28 -; GFX11-NEXT: scratch_store_b32 off, v171, s32 offset:24 -; GFX11-NEXT: scratch_store_b32 off, v172, s32 offset:20 -; GFX11-NEXT: scratch_store_b32 off, v173, s32 offset:16 -; GFX11-NEXT: scratch_store_b32 off, v174, s32 offset:12 -; GFX11-NEXT: scratch_store_b32 off, v175, s32 offset:8 -; GFX11-NEXT: scratch_store_b32 off, v184, s32 offset:4 -; GFX11-NEXT: scratch_store_b32 off, v185, s32 -; GFX11-NEXT: v_dual_mov_b32 v176, v13 :: v_dual_mov_b32 v177, v12 -; GFX11-NEXT: v_dual_mov_b32 v178, v11 :: v_dual_mov_b32 v179, v10 -; GFX11-NEXT: v_dual_mov_b32 v180, v9 :: v_dual_mov_b32 v181, v8 -; GFX11-NEXT: v_dual_mov_b32 v182, v7 :: v_dual_mov_b32 v183, v6 -; GFX11-NEXT: v_dual_mov_b32 v170, v5 :: v_dual_mov_b32 v171, v4 -; GFX11-NEXT: v_dual_mov_b32 v172, v3 :: v_dual_mov_b32 v173, v2 -; GFX11-NEXT: v_dual_mov_b32 v174, v1 :: v_dual_mov_b32 v175, v0 -; GFX11-NEXT: v_dual_mov_b32 v184, s28 :: v_dual_mov_b32 v185, s29 +; GFX11-NEXT: v_dual_mov_b32 v31, v13 :: v_dual_mov_b32 v30, v12 +; GFX11-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 +; GFX11-NEXT: v_dual_mov_b32 v27, v9 :: v_dual_mov_b32 v26, v8 +; GFX11-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-NEXT: v_dual_mov_b32 v16, s28 :: v_dual_mov_b32 v17, s29 ; GFX11-NEXT: s_mov_b32 s4, 0 ; GFX11-NEXT: s_and_b32 s5, vcc_lo, exec_lo ; GFX11-NEXT: s_cbranch_scc0 .LBB51_4 ; GFX11-NEXT: ; %bb.1: ; %cmp.false -; GFX11-NEXT: v_dual_mov_b32 v47, s0 :: v_dual_mov_b32 v52, s2 -; GFX11-NEXT: v_dual_mov_b32 v49, s1 :: v_dual_mov_b32 v56, s3 -; GFX11-NEXT: v_dual_mov_b32 v61, s16 :: v_dual_mov_b32 v74, s18 -; GFX11-NEXT: v_dual_mov_b32 v67, s17 :: v_dual_mov_b32 v82, s19 -; GFX11-NEXT: v_dual_mov_b32 v91, s20 :: v_dual_mov_b32 v112, s22 -; GFX11-NEXT: v_dual_mov_b32 v101, s21 :: v_dual_mov_b32 v124, s23 -; GFX11-NEXT: v_dual_mov_b32 v137, s24 :: v_dual_mov_b32 v14, s26 -; GFX11-NEXT: v_dual_mov_b32 v151, s25 :: v_dual_mov_b32 v30, s27 +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-NEXT: v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v5, s17 +; GFX11-NEXT: v_dual_mov_b32 v6, s18 :: v_dual_mov_b32 v7, s19 +; GFX11-NEXT: v_dual_mov_b32 v8, s20 :: v_dual_mov_b32 v9, s21 +; GFX11-NEXT: v_dual_mov_b32 v10, s22 :: v_dual_mov_b32 v11, s23 +; GFX11-NEXT: v_dual_mov_b32 v12, s24 :: v_dual_mov_b32 v13, s25 +; GFX11-NEXT: v_dual_mov_b32 v14, s26 :: v_dual_mov_b32 v15, s27 ; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 ; GFX11-NEXT: s_cbranch_vccnz .LBB51_3 ; GFX11-NEXT: .LBB51_2: ; %cmp.true -; GFX11-NEXT: v_pk_add_u16 v30, s27, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v15, s27, 3 op_sel_hi:[1,0] ; GFX11-NEXT: v_pk_add_u16 v14, s26, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v176, v176, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v177, v177, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v178, v178, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v179, v179, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v180, v180, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v181, v181, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v182, v182, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v183, v183, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v170, v170, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v171, v171, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v172, v172, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v173, v173, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v174, v174, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v175, v175, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v185, v185, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v184, v184, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v151, s25, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v137, s24, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v124, s23, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v112, s22, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v101, s21, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v91, s20, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v82, s19, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v74, s18, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v67, s17, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v61, s16, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v56, s3, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v52, s2, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v49, s1, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v47, s0, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v13, s25, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v12, s24, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v11, s23, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v10, s22, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v9, s21, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v8, s20, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v7, s19, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v6, s18, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v5, s17, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v4, s16, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v3, s3, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v2, s2, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v1, s1, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v0, s0, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v31, v31, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v30, v30, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v29, v29, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v28, v28, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v27, v27, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v26, v26, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v25, v25, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v24, v24, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v23, v23, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v22, v22, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v21, v21, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v20, v20, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v19, v19, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v18, v18, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v17, v17, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v16, v16, 3 op_sel_hi:[1,0] ; GFX11-NEXT: .LBB51_3: ; %end -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_dual_mov_b32 v0, v47 :: v_dual_mov_b32 v1, v49 -; GFX11-NEXT: v_dual_mov_b32 v3, v56 :: v_dual_mov_b32 v4, v61 -; GFX11-NEXT: v_dual_mov_b32 v6, v74 :: v_dual_mov_b32 v9, v101 -; GFX11-NEXT: v_dual_mov_b32 v7, v82 :: v_dual_mov_b32 v8, v91 -; GFX11-NEXT: v_dual_mov_b32 v11, v124 :: v_dual_mov_b32 v12, v137 -; GFX11-NEXT: v_dual_mov_b32 v15, v30 :: v_dual_mov_b32 v16, v184 -; GFX11-NEXT: v_dual_mov_b32 v17, v185 :: v_dual_mov_b32 v18, v175 -; GFX11-NEXT: v_dual_mov_b32 v19, v174 :: v_dual_mov_b32 v20, v173 -; GFX11-NEXT: v_dual_mov_b32 v21, v172 :: v_dual_mov_b32 v22, v171 -; GFX11-NEXT: v_dual_mov_b32 v23, v170 :: v_dual_mov_b32 v24, v183 -; GFX11-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-NEXT: scratch_load_b32 v185, off, s32 -; GFX11-NEXT: scratch_load_b32 v184, off, s32 offset:4 -; GFX11-NEXT: scratch_load_b32 v175, off, s32 offset:8 -; GFX11-NEXT: scratch_load_b32 v174, off, s32 offset:12 -; GFX11-NEXT: scratch_load_b32 v173, off, s32 offset:16 -; GFX11-NEXT: scratch_load_b32 v172, off, s32 offset:20 -; GFX11-NEXT: scratch_load_b32 v171, off, s32 offset:24 -; GFX11-NEXT: scratch_load_b32 v170, off, s32 offset:28 -; GFX11-NEXT: scratch_load_b32 v169, off, s32 offset:32 -; GFX11-NEXT: scratch_load_b32 v168, off, s32 offset:36 -; GFX11-NEXT: scratch_load_b32 v159, off, s32 offset:40 -; GFX11-NEXT: scratch_load_b32 v158, off, s32 offset:44 -; GFX11-NEXT: scratch_load_b32 v157, off, s32 offset:48 -; GFX11-NEXT: scratch_load_b32 v156, off, s32 offset:52 -; GFX11-NEXT: scratch_load_b32 v155, off, s32 offset:56 -; GFX11-NEXT: scratch_load_b32 v154, off, s32 offset:60 -; GFX11-NEXT: scratch_load_b32 v153, off, s32 offset:64 -; GFX11-NEXT: scratch_load_b32 v152, off, s32 offset:68 -; GFX11-NEXT: scratch_load_b32 v143, off, s32 offset:72 -; GFX11-NEXT: scratch_load_b32 v142, off, s32 offset:76 -; GFX11-NEXT: scratch_load_b32 v141, off, s32 offset:80 -; GFX11-NEXT: scratch_load_b32 v140, off, s32 offset:84 -; GFX11-NEXT: scratch_load_b32 v139, off, s32 offset:88 -; GFX11-NEXT: scratch_load_b32 v138, off, s32 offset:92 -; GFX11-NEXT: scratch_load_b32 v137, off, s32 offset:96 -; GFX11-NEXT: scratch_load_b32 v136, off, s32 offset:100 -; GFX11-NEXT: scratch_load_b32 v127, off, s32 offset:104 -; GFX11-NEXT: scratch_load_b32 v126, off, s32 offset:108 -; GFX11-NEXT: scratch_load_b32 v125, off, s32 offset:112 -; GFX11-NEXT: scratch_load_b32 v124, off, s32 offset:116 -; GFX11-NEXT: scratch_load_b32 v123, off, s32 offset:120 -; GFX11-NEXT: scratch_load_b32 v122, off, s32 offset:124 -; GFX11-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-NEXT: scratch_load_b32 v121, off, s32 offset:128 -; GFX11-NEXT: scratch_load_b32 v120, off, s32 offset:132 -; GFX11-NEXT: scratch_load_b32 v111, off, s32 offset:136 -; GFX11-NEXT: scratch_load_b32 v110, off, s32 offset:140 -; GFX11-NEXT: scratch_load_b32 v109, off, s32 offset:144 -; GFX11-NEXT: scratch_load_b32 v108, off, s32 offset:148 -; GFX11-NEXT: scratch_load_b32 v107, off, s32 offset:152 -; GFX11-NEXT: scratch_load_b32 v106, off, s32 offset:156 -; GFX11-NEXT: scratch_load_b32 v105, off, s32 offset:160 -; GFX11-NEXT: scratch_load_b32 v104, off, s32 offset:164 -; GFX11-NEXT: scratch_load_b32 v95, off, s32 offset:168 -; GFX11-NEXT: scratch_load_b32 v94, off, s32 offset:172 -; GFX11-NEXT: scratch_load_b32 v93, off, s32 offset:176 -; GFX11-NEXT: scratch_load_b32 v92, off, s32 offset:180 -; GFX11-NEXT: scratch_load_b32 v91, off, s32 offset:184 -; GFX11-NEXT: scratch_load_b32 v90, off, s32 offset:188 -; GFX11-NEXT: scratch_load_b32 v89, off, s32 offset:192 -; GFX11-NEXT: scratch_load_b32 v88, off, s32 offset:196 -; GFX11-NEXT: scratch_load_b32 v79, off, s32 offset:200 -; GFX11-NEXT: scratch_load_b32 v78, off, s32 offset:204 -; GFX11-NEXT: scratch_load_b32 v77, off, s32 offset:208 -; GFX11-NEXT: scratch_load_b32 v76, off, s32 offset:212 -; GFX11-NEXT: scratch_load_b32 v75, off, s32 offset:216 -; GFX11-NEXT: scratch_load_b32 v74, off, s32 offset:220 -; GFX11-NEXT: scratch_load_b32 v73, off, s32 offset:224 -; GFX11-NEXT: scratch_load_b32 v72, off, s32 offset:228 -; GFX11-NEXT: scratch_load_b32 v63, off, s32 offset:232 -; GFX11-NEXT: scratch_load_b32 v62, off, s32 offset:236 -; GFX11-NEXT: scratch_load_b32 v61, off, s32 offset:240 -; GFX11-NEXT: scratch_load_b32 v60, off, s32 offset:244 -; GFX11-NEXT: scratch_load_b32 v59, off, s32 offset:248 -; GFX11-NEXT: scratch_load_b32 v58, off, s32 offset:252 -; GFX11-NEXT: s_clause 0x9 ; 40-byte Folded Reload -; GFX11-NEXT: scratch_load_b32 v57, off, s32 offset:256 -; GFX11-NEXT: scratch_load_b32 v56, off, s32 offset:260 -; GFX11-NEXT: scratch_load_b32 v47, off, s32 offset:264 -; GFX11-NEXT: scratch_load_b32 v46, off, s32 offset:268 -; GFX11-NEXT: scratch_load_b32 v45, off, s32 offset:272 -; GFX11-NEXT: scratch_load_b32 v44, off, s32 offset:276 -; GFX11-NEXT: scratch_load_b32 v43, off, s32 offset:280 -; GFX11-NEXT: scratch_load_b32 v42, off, s32 offset:284 -; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:288 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:292 -; GFX11-NEXT: v_dual_mov_b32 v2, v52 :: v_dual_mov_b32 v5, v67 -; GFX11-NEXT: v_dual_mov_b32 v10, v112 :: v_dual_mov_b32 v13, v151 -; GFX11-NEXT: v_dual_mov_b32 v25, v182 :: v_dual_mov_b32 v26, v181 -; GFX11-NEXT: v_dual_mov_b32 v27, v180 :: v_dual_mov_b32 v28, v179 -; GFX11-NEXT: v_dual_mov_b32 v29, v178 :: v_dual_mov_b32 v30, v177 -; GFX11-NEXT: v_mov_b32_e32 v31, v176 -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; GFX11-NEXT: .LBB51_4: -; GFX11-NEXT: ; implicit-def: $vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78 -; GFX11-NEXT: ; implicit-def: $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79 -; GFX11-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-NEXT: ; implicit-def: $vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81 -; GFX11-NEXT: ; implicit-def: $vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84 -; GFX11-NEXT: ; implicit-def: $vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88 -; GFX11-NEXT: ; implicit-def: $vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93 -; GFX11-NEXT: ; implicit-def: $vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99 -; GFX11-NEXT: ; implicit-def: $vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106 -; GFX11-NEXT: ; implicit-def: $vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114 -; GFX11-NEXT: ; implicit-def: $vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123 -; GFX11-NEXT: ; implicit-def: $vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133 -; GFX11-NEXT: ; implicit-def: $vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144 -; GFX11-NEXT: ; implicit-def: $vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156 -; GFX11-NEXT: ; implicit-def: $vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169 +; GFX11-NEXT: ; implicit-def: $vgpr0 +; GFX11-NEXT: ; implicit-def: $vgpr1 +; GFX11-NEXT: ; implicit-def: $vgpr2 +; GFX11-NEXT: ; implicit-def: $vgpr3 +; GFX11-NEXT: ; implicit-def: $vgpr4 +; GFX11-NEXT: ; implicit-def: $vgpr5 +; GFX11-NEXT: ; implicit-def: $vgpr6 +; GFX11-NEXT: ; implicit-def: $vgpr7 +; GFX11-NEXT: ; implicit-def: $vgpr8 +; GFX11-NEXT: ; implicit-def: $vgpr9 +; GFX11-NEXT: ; implicit-def: $vgpr10 +; GFX11-NEXT: ; implicit-def: $vgpr11 +; GFX11-NEXT: ; implicit-def: $vgpr12 +; GFX11-NEXT: ; implicit-def: $vgpr13 +; GFX11-NEXT: ; implicit-def: $vgpr14 +; GFX11-NEXT: ; implicit-def: $vgpr15 ; GFX11-NEXT: s_branch .LBB51_2 %cmp = icmp eq i32 %b, 0 br i1 %cmp, label %cmp.true, label %cmp.false @@ -103019,1123 +101411,751 @@ define inreg <16 x i64> @bitcast_v64bf16_to_v16i64_scalar(<64 x bfloat> inreg %a ; GFX9-NEXT: v_and_b32_sdwa v32, v18, v32 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX9-NEXT: v_lshl_or_b32 v32, v33, 16, v32 ; GFX9-NEXT: v_and_b32_e32 v33, 0xffff0000, v17 -; GFX9-NEXT: v_add_f32_e32 v33, 0x40c00000, v33 -; GFX9-NEXT: v_bfe_u32 v34, v33, 16, 1 -; GFX9-NEXT: v_add_u32_e32 v34, v34, v33 -; GFX9-NEXT: v_lshlrev_b32_e32 v17, 16, v17 -; GFX9-NEXT: v_add_u32_e32 v34, 0x7fff, v34 -; GFX9-NEXT: v_or_b32_e32 v35, 0x400000, v33 -; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v33, v33 -; GFX9-NEXT: v_add_f32_e32 v17, 0x40c00000, v17 -; GFX9-NEXT: v_cndmask_b32_e32 v33, v34, v35, vcc -; GFX9-NEXT: v_bfe_u32 v34, v17, 16, 1 -; GFX9-NEXT: v_add_u32_e32 v34, v34, v17 -; GFX9-NEXT: v_add_u32_e32 v34, 0x7fff, v34 -; GFX9-NEXT: v_or_b32_e32 v35, 0x400000, v17 -; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v17, v17 -; GFX9-NEXT: v_cndmask_b32_e32 v17, v34, v35, vcc -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v33 -; GFX9-NEXT: v_and_b32_sdwa v17, v18, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX9-NEXT: v_lshl_or_b32 v17, v33, 16, v17 -; GFX9-NEXT: v_and_b32_e32 v33, 0xffff0000, v16 -; GFX9-NEXT: v_add_f32_e32 v33, 0x40c00000, v33 -; GFX9-NEXT: v_bfe_u32 v34, v33, 16, 1 -; GFX9-NEXT: v_add_u32_e32 v34, v34, v33 -; GFX9-NEXT: v_lshlrev_b32_e32 v16, 16, v16 -; GFX9-NEXT: v_add_u32_e32 v34, 0x7fff, v34 -; GFX9-NEXT: v_or_b32_e32 v35, 0x400000, v33 -; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v33, v33 -; GFX9-NEXT: v_add_f32_e32 v16, 0x40c00000, v16 -; GFX9-NEXT: v_cndmask_b32_e32 v33, v34, v35, vcc -; GFX9-NEXT: v_bfe_u32 v34, v16, 16, 1 -; GFX9-NEXT: v_add_u32_e32 v34, v34, v16 -; GFX9-NEXT: v_add_u32_e32 v34, 0x7fff, v34 -; GFX9-NEXT: v_or_b32_e32 v35, 0x400000, v16 -; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v16, v16 -; GFX9-NEXT: v_cndmask_b32_e32 v16, v34, v35, vcc -; GFX9-NEXT: v_and_b32_sdwa v16, v18, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX9-NEXT: v_lshrrev_b32_e32 v18, 16, v33 -; GFX9-NEXT: v_lshl_or_b32 v16, v18, 16, v16 -; GFX9-NEXT: .LBB63_3: ; %end -; GFX9-NEXT: v_mov_b32_e32 v18, v32 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; GFX9-NEXT: .LBB63_4: -; GFX9-NEXT: s_branch .LBB63_2 -; -; GFX11-TRUE16-LABEL: bitcast_v64bf16_to_v16i64_scalar: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:156 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:28 -; GFX11-TRUE16-NEXT: s_clause 0x6 ; 28-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v167, v13 :: v_dual_mov_b32 v176, v12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v177, v11 :: v_dual_mov_b32 v178, v10 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v179, v9 :: v_dual_mov_b32 v180, v8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v181, v7 :: v_dual_mov_b32 v182, v6 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v183, v5 :: v_dual_mov_b32 v168, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v169, v3 :: v_dual_mov_b32 v170, v2 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v171, v1 :: v_dual_mov_b32 v172, v0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v174, s28 :: v_dual_mov_b32 v173, s29 -; GFX11-TRUE16-NEXT: s_mov_b32 s4, 0 -; GFX11-TRUE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo -; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB63_4 -; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v135, s0 :: v_dual_mov_b32 v134, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v132, s2 :: v_dual_mov_b32 v129, s3 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v125, s16 :: v_dual_mov_b32 v120, s17 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v114, s18 :: v_dual_mov_b32 v107, s19 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v99, s20 :: v_dual_mov_b32 v90, s21 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v80, s22 :: v_dual_mov_b32 v69, s23 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v57, s24 :: v_dual_mov_b32 v44, s25 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, s26 :: v_dual_mov_b32 v15, s27 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 -; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB63_3 -; GFX11-TRUE16-NEXT: .LBB63_2: ; %cmp.true -; GFX11-TRUE16-NEXT: s_lshl_b32 s5, s27, 16 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s27, 0xffff0000 -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s5 -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s26, 0xffff0000 -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s26, 16 -; GFX11-TRUE16-NEXT: v_add_f32_e64 v3, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: v_bfe_u32 v4, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_bfe_u32 v7, v3, 16, 1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, v4, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s25, 0xffff0000 -; GFX11-TRUE16-NEXT: s_lshl_b32 s7, s25, 16 -; GFX11-TRUE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 0x7fff, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v0 -; GFX11-TRUE16-NEXT: v_add_f32_e64 v6, 0x40c00000, s5 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s24, 0xffff0000 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v2, v8 :: v_dual_add_nc_u32 v7, v7, v3 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v3 -; GFX11-TRUE16-NEXT: v_bfe_u32 v8, v5, 16, 1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v4, v9, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v4, 0x40c00000, s7 -; GFX11-TRUE16-NEXT: v_bfe_u32 v9, v6, 16, 1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v15, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, 0x400000, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 0x7fff, v7 -; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v4, 16, 1 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s24, 16 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v2, v7, v2 :: v_dual_add_nc_u32 v7, v8, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, v9, v6 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, 0x400000, v5 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v3, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 0x7fff, v8 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, 0x400000, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v6, v7, v8, vcc_lo -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc_lo -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v6 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v30, 16, v3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v30.h, v1.l -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc_lo -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v44, 16, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v44.h, v4.l -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s23, 0xffff0000 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v57, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v57.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s23, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s22, 0xffff0000 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v69, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v69.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s22, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s21, 0xffff0000 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v80, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v80.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s21, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s20, 0xffff0000 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v90, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v90.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s20, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s19, 0xffff0000 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v99, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v99.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s19, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s18, 0xffff0000 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v107, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v107.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s17, 0xffff0000 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v114, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v114.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s17, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s16, 0xffff0000 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v120, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v120.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s16, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s3, 0xffff0000 -; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s3, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v125, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v125.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s3 -; GFX11-TRUE16-NEXT: s_and_b32 s3, s2, 0xffff0000 -; GFX11-TRUE16-NEXT: s_lshl_b32 s2, s2, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v129, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v129.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s3 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s2 -; GFX11-TRUE16-NEXT: s_and_b32 s2, s1, 0xffff0000 -; GFX11-TRUE16-NEXT: s_lshl_b32 s1, s1, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v132, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v132.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s2 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s1 -; GFX11-TRUE16-NEXT: s_and_b32 s1, s0, 0xffff0000 -; GFX11-TRUE16-NEXT: s_lshl_b32 s0, s0, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v134, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v134.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s1 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s0 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v135, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v135.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v167 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v167 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v167, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v167.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v176 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v176 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v176, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v176.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v177 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX9-NEXT: v_add_f32_e32 v33, 0x40c00000, v33 +; GFX9-NEXT: v_bfe_u32 v34, v33, 16, 1 +; GFX9-NEXT: v_add_u32_e32 v34, v34, v33 +; GFX9-NEXT: v_lshlrev_b32_e32 v17, 16, v17 +; GFX9-NEXT: v_add_u32_e32 v34, 0x7fff, v34 +; GFX9-NEXT: v_or_b32_e32 v35, 0x400000, v33 +; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v33, v33 +; GFX9-NEXT: v_add_f32_e32 v17, 0x40c00000, v17 +; GFX9-NEXT: v_cndmask_b32_e32 v33, v34, v35, vcc +; GFX9-NEXT: v_bfe_u32 v34, v17, 16, 1 +; GFX9-NEXT: v_add_u32_e32 v34, v34, v17 +; GFX9-NEXT: v_add_u32_e32 v34, 0x7fff, v34 +; GFX9-NEXT: v_or_b32_e32 v35, 0x400000, v17 +; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v17, v17 +; GFX9-NEXT: v_cndmask_b32_e32 v17, v34, v35, vcc +; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX9-NEXT: v_and_b32_sdwa v17, v18, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NEXT: v_lshl_or_b32 v17, v33, 16, v17 +; GFX9-NEXT: v_and_b32_e32 v33, 0xffff0000, v16 +; GFX9-NEXT: v_add_f32_e32 v33, 0x40c00000, v33 +; GFX9-NEXT: v_bfe_u32 v34, v33, 16, 1 +; GFX9-NEXT: v_add_u32_e32 v34, v34, v33 +; GFX9-NEXT: v_lshlrev_b32_e32 v16, 16, v16 +; GFX9-NEXT: v_add_u32_e32 v34, 0x7fff, v34 +; GFX9-NEXT: v_or_b32_e32 v35, 0x400000, v33 +; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v33, v33 +; GFX9-NEXT: v_add_f32_e32 v16, 0x40c00000, v16 +; GFX9-NEXT: v_cndmask_b32_e32 v33, v34, v35, vcc +; GFX9-NEXT: v_bfe_u32 v34, v16, 16, 1 +; GFX9-NEXT: v_add_u32_e32 v34, v34, v16 +; GFX9-NEXT: v_add_u32_e32 v34, 0x7fff, v34 +; GFX9-NEXT: v_or_b32_e32 v35, 0x400000, v16 +; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v16, v16 +; GFX9-NEXT: v_cndmask_b32_e32 v16, v34, v35, vcc +; GFX9-NEXT: v_and_b32_sdwa v16, v18, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NEXT: v_lshrrev_b32_e32 v18, 16, v33 +; GFX9-NEXT: v_lshl_or_b32 v16, v18, 16, v16 +; GFX9-NEXT: .LBB63_3: ; %end +; GFX9-NEXT: v_mov_b32_e32 v18, v32 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-NEXT: .LBB63_4: +; GFX9-NEXT: s_branch .LBB63_2 +; +; GFX11-TRUE16-LABEL: bitcast_v64bf16_to_v16i64_scalar: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, v13 :: v_dual_mov_b32 v30, v12 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v9 :: v_dual_mov_b32 v26, v8 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s28 :: v_dual_mov_b32 v17, s29 +; GFX11-TRUE16-NEXT: s_mov_b32 s4, 0 +; GFX11-TRUE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo +; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB63_4 +; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v5, s17 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s18 :: v_dual_mov_b32 v7, s19 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s20 :: v_dual_mov_b32 v9, s21 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s22 :: v_dual_mov_b32 v11, s23 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s24 :: v_dual_mov_b32 v13, s25 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s26 :: v_dual_mov_b32 v15, s27 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 +; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB63_3 +; GFX11-TRUE16-NEXT: .LBB63_2: ; %cmp.true +; GFX11-TRUE16-NEXT: s_lshl_b32 s5, s27, 16 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s27, 0xffff0000 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s5 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s26, 0xffff0000 +; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s26, 16 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v3, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_bfe_u32 v5, v1, 16, 1 +; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v0, 16, 1 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v0 ; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v177 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, 0x400000, v1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v5, v5, v1 +; GFX11-TRUE16-NEXT: v_bfe_u32 v8, v3, 16, 1 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v4, 0x40c00000, s6 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, 0x400000, v3 +; GFX11-TRUE16-NEXT: s_and_b32 s5, s25, 0xffff0000 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v5, 0x7fff, v5 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v0 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v6, 0x40c00000, s5 +; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s25, 16 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v177, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v177.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v178 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v178 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v2, v7, vcc_lo ; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v178, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v178.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v179 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v179 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 +; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v4, 16, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v5, v9, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, v8, v3 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v4 +; GFX11-TRUE16-NEXT: v_bfe_u32 v5, v6, 16, 1 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v15, 16, v1 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v0.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 0x7fff, v8 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v5, v6 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s24, 0xffff0000 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v3, v7, v10, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v179, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v179.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v180 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v180 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_add_f32_e64 v7, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, 0x400000, v6 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v3 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v4 +; GFX11-TRUE16-NEXT: v_bfe_u32 v9, v5, 16, 1 +; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s24, 16 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 +; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v7, 16, 1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, v9, v5 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, 0x400000, v7 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v14, 16, v1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc_lo +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v0.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, v3, v7 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v5 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v4, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s23, 0xffff0000 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x7fff, v0 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v8, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v4, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7 +; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s23, 16 +; GFX11-TRUE16-NEXT: v_bfe_u32 v5, v8, 16, 1 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v13, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v3, v4 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v3, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s22, 0xffff0000 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v1.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v5, v8 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v180, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v180.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v181 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v181 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v4 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v9, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4 +; GFX11-TRUE16-NEXT: v_bfe_u32 v7, v3, 16, 1 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, 0x400000, v8 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s22, 16 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v5, v9, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, v7, v3 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v9 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v12, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v5, v9 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v0.l +; GFX11-TRUE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s21, 0xffff0000 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v181, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v181.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v182 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v182 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX11-TRUE16-NEXT: v_bfe_u32 v8, v5, 16, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v6, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s21, 16 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v4 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, 0x400000, v3 +; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v6, 16, 1 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v182, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v182.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v183 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v183 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v183, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v183.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v168 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v168 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v9, v9 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, v8, v5 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v11, 16, v1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc_lo +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v0.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, v3, v6 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v5 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v4, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s20, 0xffff0000 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x7fff, v0 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v8, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v4, 16, 1 +; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s20, 16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v6 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 +; GFX11-TRUE16-NEXT: v_bfe_u32 v5, v8, 16, 1 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v10, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v3, v4 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v3, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s19, 0xffff0000 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.h, v1.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v5, v8 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v168, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v168.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v169 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v169 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v4 +; GFX11-TRUE16-NEXT: v_bfe_u32 v7, v3, 16, 1 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v32, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, 0x400000, v8 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, v7, v3 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v32 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v5, v32, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8 +; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s19, 16 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v9, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v5, v32 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v0.l +; GFX11-TRUE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s18, 0xffff0000 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v169, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v169.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v170 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v170 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX11-TRUE16-NEXT: v_bfe_u32 v8, v5, 16, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v6, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s18, 16 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v4 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, 0x400000, v3 +; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v6, 16, 1 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, v8, v5 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v8, 16, v1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc_lo +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v0.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, v3, v6 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v5 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v4, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s17, 0xffff0000 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x7fff, v0 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v32, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v6 +; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s17, 16 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_bfe_u32 v5, v32, 16, 1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v4, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, 0x400000, v32 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v7, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v3, v4 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v3, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v1.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v5, v32 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v4 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v170, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v170.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v171 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_bfe_u32 v33, v3, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s16, 0xffff0000 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v171 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_add_f32_e64 v34, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, v33, v3 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_bfe_u32 v5, v34, 16, 1 +; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc_lo +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v0.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v5, v34 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v32, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v4 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, 0x400000, v3 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s3, 0xffff0000 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v171, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v171.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v172 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v172 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_add_f32_e64 v33, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_bfe_u32 v35, v32, 16, 1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s3, 16 +; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v33, 16, 1 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v34, 0x40c00000, s3 +; GFX11-TRUE16-NEXT: s_and_b32 s3, s2, 0xffff0000 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, v35, v32 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v35, 0x40c00000, s3 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v0.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, v3, v33 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v32 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x7fff, v0 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, 0x400000, v33 +; GFX11-TRUE16-NEXT: v_bfe_u32 v32, v35, 16, 1 +; GFX11-TRUE16-NEXT: s_lshl_b32 s2, s2, 16 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v34, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v33, v33 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v33, 0x400000, v35 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v3, v34 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v1.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v32, v35 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v32, 0x40c00000, s2 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v34 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v172, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v172.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v173 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: s_and_b32 s2, s1, 0xffff0000 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v173 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_add_f32_e64 v37, 0x40c00000, s2 +; GFX11-TRUE16-NEXT: v_bfe_u32 v36, v32, 16, 1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_bfe_u32 v34, v37, 16, 1 +; GFX11-TRUE16-NEXT: s_lshl_b32 s1, s1, 16 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v33, vcc_lo +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, v36, v32 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v34, v37 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v34, 0x40c00000, s1 +; GFX11-TRUE16-NEXT: s_and_b32 s1, s0, 0xffff0000 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v0.l +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v33 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v33, 0x400000, v32 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v35, 0x40c00000, s1 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v173, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v173.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v174 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v37 +; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v34, 16, 1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v33, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v37, v37 +; GFX11-TRUE16-NEXT: v_bfe_u32 v32, v35, 16, 1 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v37, 0x400000, v35 +; GFX11-TRUE16-NEXT: s_lshl_b32 s0, s0, 16 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v33, v2, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v36, v38, v34 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v0.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, v32, v35 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v33 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v33, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v31 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x7fff, v0 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v36 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v36, 0x40c00000, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v33, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-TRUE16-NEXT: v_bfe_u32 v33, v36, 16, 1 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v34, 0x40c00000, v38 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v35, 0x400000, v36 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v0, v37 :: v_dual_add_nc_u32 v33, v33, v36 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v32.l +; GFX11-TRUE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v30 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v0 +; GFX11-TRUE16-NEXT: v_bfe_u32 v0, v34, 16, 1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v30, 16, v30 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v33, v33, v35 :: v_dual_add_nc_u32 v0, v0, v34 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v37 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v37, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v36, 0x7fff, v0 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v31, 16, v31 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v33 +; GFX11-TRUE16-NEXT: v_bfe_u32 v33, v35, 16, 1 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v32.l +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v34, v36, v37 :: v_dual_add_f32 v31, 0x40c00000, v31 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v30, 0x40c00000, v30 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, v33, v35 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v37, 0x400000, v35 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v34 +; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v31, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v31, v31 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v36, v38, v31 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v29 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v29, 16, v29 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v36 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v31 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v29, 0x40c00000, v29 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v31, v34, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v34, v30, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v38 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v30 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v31, 16, v31 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, v34, v30 +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v32, v32, v37 :: v_dual_and_b32 v37, 0xffff0000, v28 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v31.h, v33.l +; GFX11-TRUE16-NEXT: v_bfe_u32 v33, v35, 16, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v30, v30 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v28, 16, v28 +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v30, v34, v36 :: v_dual_add_nc_u32 v33, v33, v35 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v34, 0x40c00000, v37 +; GFX11-TRUE16-NEXT: v_bfe_u32 v37, v29, 16, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v35 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v30, 16, v30 +; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v34, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-TRUE16-NEXT: v_dual_add_f32 v28, 0x40c00000, v28 :: v_dual_add_nc_u32 v35, v37, v29 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v30.h, v32.l +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v33, v33, v36 :: v_dual_add_nc_u32 v32, v38, v34 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v29 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v37, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v29, v29 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v27 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v29, v35, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v35, v28, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v34, 0x40c00000, v38 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v28 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v29, 16, v29 +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v32, v32, v37 :: v_dual_add_nc_u32 v35, v35, v28 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v26 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v26, 16, v26 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v27, 16, v27 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.h, v33.l +; GFX11-TRUE16-NEXT: v_bfe_u32 v33, v34, 16, 1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v28, v28 +; GFX11-TRUE16-NEXT: v_dual_add_f32 v26, 0x40c00000, v26 :: v_dual_add_f32 v27, 0x40c00000, v27 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, v33, v34 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v28, v35, v36 :: v_dual_add_f32 v35, 0x40c00000, v37 +; GFX11-TRUE16-NEXT: v_bfe_u32 v37, v27, 16, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v35, 16, 1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, v37, v27 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v28, 16, v28 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v28.h, v32.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v33, v33, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v27 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v27, v27 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, v38, v35 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v37, 0x400000, v35 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v25 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v27, v34, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v34, v26, 16, 1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v26 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v38 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, v34, v26 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v27, 16, v27 +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v32, v32, v37 :: v_dual_and_b32 v37, 0xffff0000, v24 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v26, v26 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v24, 16, v24 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v25, 16, v25 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.h, v33.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v26, v34, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_add_f32_e32 v34, 0x40c00000, v37 +; GFX11-TRUE16-NEXT: v_bfe_u32 v33, v35, 16, 1 +; GFX11-TRUE16-NEXT: v_dual_add_f32 v24, 0x40c00000, v24 :: v_dual_add_f32 v25, 0x40c00000, v25 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v26, 16, v26 +; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v34, 16, 1 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v26.h, v32.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v35 +; GFX11-TRUE16-NEXT: v_bfe_u32 v37, v25, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, v38, v34 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v23 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v23, 16, v23 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, v33, v35 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, v37, v25 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v37, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v23, 0x40c00000, v23 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v33, v33, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v25 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v25, v25 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v25, v35, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v35, v24, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v34, 0x40c00000, v38 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v24 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v25, 16, v25 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.h, v33.l +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v32, v32, v37 :: v_dual_and_b32 v37, 0xffff0000, v22 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v22, 16, v22 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, v35, v24 +; GFX11-TRUE16-NEXT: v_bfe_u32 v33, v34, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v24, v24 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v22, 0x40c00000, v22 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v174 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v24, v35, v36 :: v_dual_add_nc_u32 v33, v33, v34 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v37 +; GFX11-TRUE16-NEXT: v_bfe_u32 v37, v23, 16, 1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v35, 16, 1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, v37, v23 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v24, 16, v24 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.h, v32.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v33, v33, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v23 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v23, v23 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, v38, v35 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v37, 0x400000, v35 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v21 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v23, v34, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v34, v22, 16, 1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v22 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v38 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, v34, v22 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v23, 16, v23 +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v32, v32, v37 :: v_dual_and_b32 v37, 0xffff0000, v20 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v22, v22 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.h, v33.l +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-TRUE16-NEXT: v_bfe_u32 v33, v35, 16, 1 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v20, 16, v20 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v22, v34, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_add_f32_e32 v34, 0x40c00000, v37 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v35 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, v33, v35 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v22, 16, v22 +; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v34, 16, 1 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.h, v32.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v20, 0x40c00000, v20 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, v38, v34 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v19 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v21, 16, v21 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v33, v33, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v19, 16, v19 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_f32_e32 v21, 0x40c00000, v21 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_add_f32_e32 v19, 0x40c00000, v19 +; GFX11-TRUE16-NEXT: v_bfe_u32 v37, v21, 16, 1 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v21 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v21, v21 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, v37, v21 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v37, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v174, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v174.h, v0.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v21, v35, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v35, v20, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v34, 0x40c00000, v38 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v20 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v21, 16, v21 +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v32, v32, v37 :: v_dual_add_nc_u32 v35, v35, v20 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v33.l +; GFX11-TRUE16-NEXT: v_bfe_u32 v33, v34, 16, 1 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v18 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v20, v20 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v18, 16, v18 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, v33, v34 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v20, v35, v36 :: v_dual_add_f32 v35, 0x40c00000, v37 +; GFX11-TRUE16-NEXT: v_bfe_u32 v37, v19, 16, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v35, 16, 1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, v37, v19 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v18, 0x40c00000, v18 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v20, 16, v20 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v33, v33, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v19 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v19, v19 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.h, v32.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, v38, v35 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v17 +; GFX11-TRUE16-NEXT: v_bfe_u32 v39, v18, 16, 1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v19, v34, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v38, 0x400000, v35 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v34, 0x40c00000, v37 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, v39, v18 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v19, 16, v19 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v33.l +; GFX11-TRUE16-NEXT: v_bfe_u32 v36, v34, 16, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v32, v32, v38 :: v_dual_add_nc_u32 v33, 0x7fff, v35 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v35, 0x400000, v18 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v17, 16, v17 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v16 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v18, v18 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v36, v36, v34 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_dual_add_f32 v17, 0x40c00000, v17 :: v_dual_lshlrev_b32 v16, 16, v16 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v18, v33, v35 :: v_dual_add_f32 v33, 0x40c00000, v37 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v36 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_bfe_u32 v37, v17, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v33, 16, 1 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v16, 0x40c00000, v16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v39, 0x400000, v17 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v18, 16, v18 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v34, v35, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v36, v37, v17 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v37, v38, v33 +; GFX11-TRUE16-NEXT: v_bfe_u32 v35, v16, 16, 1 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v38, 0x400000, v33 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v33, v33 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v36, 0x7fff, v36 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v37, 0x7fff, v37 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, v35, v16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v48, 0x400000, v16 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v34 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v32.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v33, v37, v38, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v17, v17 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v17, v36, v39, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v16, v16 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v17, 16, v17 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v16, v35, v48, vcc_lo +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v34.l +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v16, 16, v16 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.h, v33.l ; GFX11-TRUE16-NEXT: .LBB63_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, v125 :: v_dual_mov_b32 v5, v120 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, v114 :: v_dual_mov_b32 v7, v107 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, v99 :: v_dual_mov_b32 v9, v90 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, v57 :: v_dual_mov_b32 v13, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, v30 :: v_dual_mov_b32 v17, v173 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, v174 :: v_dual_mov_b32 v19, v171 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v18, v172 :: v_dual_mov_b32 v21, v169 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, v170 :: v_dual_mov_b32 v23, v183 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v22, v168 :: v_dual_mov_b32 v25, v181 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0x6 ; 28-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:280 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, v135 :: v_dual_mov_b32 v1, v134 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, v132 :: v_dual_mov_b32 v3, v129 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, v80 :: v_dual_mov_b32 v11, v69 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v24, v182 :: v_dual_mov_b32 v27, v179 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v180 :: v_dual_mov_b32 v29, v177 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v178 :: v_dual_mov_b32 v31, v167 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v30, v176 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB63_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 ; GFX11-TRUE16-NEXT: s_branch .LBB63_2 ; ; GFX11-FAKE16-LABEL: bitcast_v64bf16_to_v16i64_scalar: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 -; GFX11-FAKE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:288 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:284 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:280 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:276 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:272 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:268 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:264 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:260 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:256 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:252 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:248 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:244 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:240 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:236 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:232 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:228 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:224 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:220 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:216 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:212 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:208 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:204 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:200 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:196 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:192 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:188 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:184 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:180 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:176 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:172 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:168 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:164 -; GFX11-FAKE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:160 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:156 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:152 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:148 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:144 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:140 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:136 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:132 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v120, s32 offset:128 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v121, s32 offset:124 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v122, s32 offset:120 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v123, s32 offset:116 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v124, s32 offset:112 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v125, s32 offset:108 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v126, s32 offset:104 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v127, s32 offset:100 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v136, s32 offset:96 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v137, s32 offset:92 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v138, s32 offset:88 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v139, s32 offset:84 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v140, s32 offset:80 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v141, s32 offset:76 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v142, s32 offset:72 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v143, s32 offset:68 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v152, s32 offset:64 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v153, s32 offset:60 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v154, s32 offset:56 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v155, s32 offset:52 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v156, s32 offset:48 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v157, s32 offset:44 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v158, s32 offset:40 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v159, s32 offset:36 -; GFX11-FAKE16-NEXT: s_clause 0x8 ; 36-byte Folded Spill -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v168, s32 offset:32 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v169, s32 offset:28 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v170, s32 offset:24 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v171, s32 offset:20 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v172, s32 offset:16 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v173, s32 offset:12 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v174, s32 offset:8 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v175, s32 offset:4 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v184, s32 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v178, v13 :: v_dual_mov_b32 v179, v12 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v180, v11 :: v_dual_mov_b32 v181, v9 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v182, v10 :: v_dual_mov_b32 v169, v7 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v170, v8 :: v_dual_mov_b32 v177, v3 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v176, v6 :: v_dual_mov_b32 v171, v4 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v174, v5 :: v_dual_mov_b32 v173, v0 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v184, v2 :: v_dual_mov_b32 v175, v1 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v183, s28 :: v_dual_mov_b32 v172, s29 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v31, v13 :: v_dual_mov_b32 v30, v12 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v27, v9 :: v_dual_mov_b32 v26, v8 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v16, s28 :: v_dual_mov_b32 v17, s29 ; GFX11-FAKE16-NEXT: s_mov_b32 s4, 0 ; GFX11-FAKE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo ; GFX11-FAKE16-NEXT: s_cbranch_scc0 .LBB63_4 ; GFX11-FAKE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v32, s0 :: v_dual_mov_b32 v37, s2 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v34, s1 :: v_dual_mov_b32 v41, s3 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v46, s16 :: v_dual_mov_b32 v59, s18 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v52, s17 :: v_dual_mov_b32 v67, s19 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v76, s20 :: v_dual_mov_b32 v97, s22 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v86, s21 :: v_dual_mov_b32 v109, s23 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v122, s24 :: v_dual_mov_b32 v151, s26 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v136, s25 :: v_dual_mov_b32 v15, s27 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v5, s17 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v6, s18 :: v_dual_mov_b32 v7, s19 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v8, s20 :: v_dual_mov_b32 v9, s21 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v10, s22 :: v_dual_mov_b32 v11, s23 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v12, s24 :: v_dual_mov_b32 v13, s25 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v14, s26 :: v_dual_mov_b32 v15, s27 ; GFX11-FAKE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 ; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB63_3 ; GFX11-FAKE16-NEXT: .LBB63_2: ; %cmp.true @@ -104143,762 +102163,674 @@ define inreg <16 x i64> @bitcast_v64bf16_to_v16i64_scalar(<64 x bfloat> inreg %a ; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s27, 16 ; GFX11-FAKE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s5 ; GFX11-FAKE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s26, 16 ; GFX11-FAKE16-NEXT: s_and_b32 s4, s26, 0xffff0000 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s6 +; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s26, 16 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v3, 0x40c00000, s4 ; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v1, 16, 1 ; GFX11-FAKE16-NEXT: v_bfe_u32 v2, v0, 16, 1 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s6 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, 0x400000, v1 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v3, 0x40c00000, s4 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v8, 0x400000, v0 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v1 +; GFX11-FAKE16-NEXT: v_bfe_u32 v9, v3, 16, 1 ; GFX11-FAKE16-NEXT: v_bfe_u32 v10, v5, 16, 1 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-FAKE16-NEXT: v_bfe_u32 v9, v3, 16, 1 -; GFX11-FAKE16-NEXT: s_lshl_b32 s7, s25, 16 +; GFX11-FAKE16-NEXT: s_and_b32 s5, s25, 0xffff0000 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 0x7fff, v4 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, v2, v0 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s25, 0xffff0000 -; GFX11-FAKE16-NEXT: s_and_b32 s4, s24, 0xffff0000 ; GFX11-FAKE16-NEXT: v_add_f32_e64 v6, 0x40c00000, s5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v51, 0xffff0000, v183 +; GFX11-FAKE16-NEXT: s_lshl_b32 s7, s25, 16 +; GFX11-FAKE16-NEXT: s_and_b32 s4, s24, 0xffff0000 +; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s24, 16 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v8, vcc_lo +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, v9, v3 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, v10, v5 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, v9, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, 0x400000, v5 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, 0x400000, v3 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-FAKE16-NEXT: v_bfe_u32 v10, v6, 16, 1 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v4, v7, vcc_lo +; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v1, v4, v7 :: v_dual_add_nc_u32 v2, 0x7fff, v2 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v5 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 0x7fff, v8 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 ; GFX11-FAKE16-NEXT: v_add_f32_e64 v8, 0x40c00000, s7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 +; GFX11-FAKE16-NEXT: v_bfe_u32 v5, v6, 16, 1 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3 -; GFX11-FAKE16-NEXT: v_bfe_u32 v3, v8, 16, 1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v15, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, v3, v8 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc_lo -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, v10, v6 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3 +; GFX11-FAKE16-NEXT: v_bfe_u32 v7, v8, 16, 1 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v15, v1, 16, v0 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v4 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc_lo +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v5, v6 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, 0x400000, v6 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v4, v7, v9, vcc_lo -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, v7, v8 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v8 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v7, 0x40c00000, s5 +; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s23, 16 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v14, v0, 16, v1 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v5 ; GFX11-FAKE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s24, 16 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v4 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v6 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, 0x400000, v8 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v9, 0x40c00000, s4 +; GFX11-FAKE16-NEXT: v_bfe_u32 v8, v7, 16, 1 ; GFX11-FAKE16-NEXT: s_and_b32 s4, s23, 0xffff0000 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_4) +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo ; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v5, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v8, 0x400000, v5 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, 0x400000, v9 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, v8, v7 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v8, 0x40c00000, s5 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v5 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v6, v1, v7 :: v_dual_and_b32 v1, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_bfe_u32 v7, v9, 16, 1 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v3 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 0x7fff, v4 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v6 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, v7, v9 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v7, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s23, 16 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v151, v0, 16, v1 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v12, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 0x7fff, v6 -; GFX11-FAKE16-NEXT: v_bfe_u32 v11, v7, 16, 1 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v5, v4, v8, vcc_lo -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v9, v9 -; GFX11-FAKE16-NEXT: s_and_b32 s4, s22, 0xffff0000 -; GFX11-FAKE16-NEXT: v_bfe_u32 v9, v12, 16, 1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, v11, v7 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xffff, v3 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc_lo -; GFX11-FAKE16-NEXT: v_add_f32_e64 v10, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s22, 16 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v5 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v11, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 0x7fff, v8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, v9, v12 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, 0x400000, v7 -; GFX11-FAKE16-NEXT: v_bfe_u32 v14, v10, 16, 1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc_lo +; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, 0x400000, v5 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v13, 0x400000, v12 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, 0x7fff, v8 +; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s22, 16 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v6 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v7 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v6, 0x40c00000, s4 +; GFX11-FAKE16-NEXT: v_bfe_u32 v7, v8, 16, 1 +; GFX11-FAKE16-NEXT: s_and_b32 s4, s22, 0xffff0000 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v6, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, v7, v8 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v7, 0x40c00000, s5 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v6 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc_lo +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v13, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v5 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v8 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, 0x400000, v6 +; GFX11-FAKE16-NEXT: v_bfe_u32 v8, v7, 16, 1 ; GFX11-FAKE16-NEXT: s_and_b32 s4, s21, 0xffff0000 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v7, v6, v9, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v9, v11, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v12, v12 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, v14, v10 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xffff, v5 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v7 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v8, v8, v13 :: v_dual_add_nc_u32 v7, v9, v11 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v9, 0x7fff, v12 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, 0x400000, v10 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v13, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v10, v10 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s21, 16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 0x7fff, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v14, 0x400000, v11 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v16, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v9, v9, v12, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v10, v13, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v11, v11 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 16, v8 -; GFX11-FAKE16-NEXT: v_bfe_u32 v12, v16, 16, 1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v5, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, v8, v7 +; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s21, 16 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v5 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc_lo +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v12, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v8, 0x40c00000, s5 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v6 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v7 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v6, 0x40c00000, s4 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, 0x400000, v5 +; GFX11-FAKE16-NEXT: v_bfe_u32 v7, v8, 16, 1 ; GFX11-FAKE16-NEXT: s_and_b32 s4, s20, 0xffff0000 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v11, v7, v14 :: v_dual_add_nc_u32 v10, v10, v13 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xffff, v8 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 16, v9 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v14, 0x400000, v13 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v10, 0x7fff, v10 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v9, 16, v11 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, v12, v16 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v12, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v13, v13 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s20, 16 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v17, 0x400000, v16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 0x7fff, v11 -; GFX11-FAKE16-NEXT: v_bfe_u32 v18, v12, 16, 1 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v19, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v13, v10, v14, vcc_lo -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v16, v16 +; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s20, 16 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v6, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, v7, v8 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v11, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v6 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc_lo +; GFX11-FAKE16-NEXT: v_add_f32_e64 v7, 0x40c00000, s5 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v5 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v8 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, 0x400000, v6 +; GFX11-FAKE16-NEXT: v_bfe_u32 v8, v7, 16, 1 ; GFX11-FAKE16-NEXT: s_and_b32 s4, s19, 0xffff0000 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v14, v18, v12 -; GFX11-FAKE16-NEXT: v_bfe_u32 v16, v19, 16, 1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v10, 0xffff, v9 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v11, v11, v17, vcc_lo -; GFX11-FAKE16-NEXT: v_add_f32_e64 v17, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s19, 16 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v9, 16, v13 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v13, 0x7fff, v14 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v14, v16, v19 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v16, 0x400000, v12 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v18, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_bfe_u32 v21, v17, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v12, v12 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v11, 16, v11 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v14, 0x7fff, v14 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v20, 0x400000, v19 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v5, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, v8, v7 +; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s19, 16 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v5 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc_lo +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v10, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v8, 0x40c00000, s5 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v6 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v7 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v6, 0x40c00000, s4 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, 0x400000, v5 +; GFX11-FAKE16-NEXT: v_bfe_u32 v7, v8, 16, 1 ; GFX11-FAKE16-NEXT: s_and_b32 s4, s18, 0xffff0000 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v13, v13, v16, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v16, v18, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v19, v19 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v19, v21, v17 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xffff, v11 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v11, 16, v13 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v14, v14, v20 :: v_dual_add_nc_u32 v13, v16, v18 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v16, 0x7fff, v19 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v19, 0x400000, v17 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v20, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v17, v17 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s18, 16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v13, 0x7fff, v13 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v21, 0x400000, v18 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v22, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v16, v16, v19, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v17, v20, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v18, v18 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v14, 16, v14 -; GFX11-FAKE16-NEXT: v_bfe_u32 v19, v22, 16, 1 +; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s18, 16 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v6, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, v7, v8 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v7, 0x40c00000, s5 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v6 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc_lo +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v9, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v5 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v8 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v32, 0x400000, v6 +; GFX11-FAKE16-NEXT: v_bfe_u32 v8, v7, 16, 1 ; GFX11-FAKE16-NEXT: s_and_b32 s4, s17, 0xffff0000 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, v17, v20 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v18, v13, v21 :: v_dual_and_b32 v13, 0xffff, v14 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v14, 16, v16 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v21, 0x400000, v20 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, 0x7fff, v17 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v16, 16, v18 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v18, v19, v22 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v19, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v20, v20 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s17, 16 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v23, 0x400000, v22 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v18, 0x7fff, v18 -; GFX11-FAKE16-NEXT: v_bfe_u32 v24, v19, 16, 1 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v25, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v20, v17, v21, vcc_lo -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v22, v22 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v5, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 +; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s17, 16 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v33, 0x400000, v5 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v5 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v32, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, v8, v7 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v8, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v6 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v7 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v6, 0x40c00000, s4 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v32, 0x40c00000, s5 ; GFX11-FAKE16-NEXT: s_and_b32 s4, s16, 0xffff0000 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v21, v24, v19 -; GFX11-FAKE16-NEXT: v_bfe_u32 v22, v25, 16, 1 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v18, v18, v23 :: v_dual_and_b32 v17, 0xffff, v16 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v23, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s16, 16 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v16, 16, v20 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v20, 0x7fff, v21 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v21, v22, v25 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v22, 0x400000, v19 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v24, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_bfe_u32 v27, v23, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v19, v19 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v18, 16, v18 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v21, 0x7fff, v21 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v26, 0x400000, v25 +; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s16, 16 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v6, 16, 1 +; GFX11-FAKE16-NEXT: v_bfe_u32 v7, v32, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v34, 0x400000, v6 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v6 +; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v2, v2, v33 :: v_dual_add_nc_u32 v5, v7, v32 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v7, v0, 16, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v5 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v32 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s4 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v33, 0x40c00000, s5 ; GFX11-FAKE16-NEXT: s_and_b32 s4, s3, 0xffff0000 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v20, v20, v22, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v22, v24, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v25, v25 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v25, v27, v23 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v19, 0xffff, v18 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v18, 16, v20 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v21, v21, v26 :: v_dual_add_nc_u32 v20, v22, v24 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v22, 0x7fff, v25 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v25, 0x400000, v23 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v26, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v23, v23 ; GFX11-FAKE16-NEXT: s_lshl_b32 s3, s3, 16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v20, 0x7fff, v20 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v27, 0x400000, v24 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v28, 0x40c00000, s3 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v22, v22, v25, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v23, v26, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v24, v24 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v21, 16, v21 -; GFX11-FAKE16-NEXT: v_bfe_u32 v25, v28, 16, 1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v5, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 +; GFX11-FAKE16-NEXT: v_bfe_u32 v32, v33, 16, 1 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v6, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v35, 0x400000, v5 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v5 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v34, vcc_lo +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v33, v33 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v34, 0x40c00000, s3 ; GFX11-FAKE16-NEXT: s_and_b32 s3, s2, 0xffff0000 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v24, v20, v27 :: v_dual_add_nc_u32 v23, v23, v26 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v20, 0xffff, v21 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v21, 16, v22 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v27, 0x400000, v26 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v23, 0x7fff, v23 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v22, 16, v24 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v24, v25, v28 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v25, 0x40c00000, s3 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v26, v26 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v33 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v32, v33 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_bfe_u32 v33, v34, 16, 1 ; GFX11-FAKE16-NEXT: s_lshl_b32 s2, s2, 16 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v29, 0x400000, v28 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v24, 0x7fff, v24 -; GFX11-FAKE16-NEXT: v_bfe_u32 v30, v25, 16, 1 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v31, 0x40c00000, s2 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v26, v23, v27, vcc_lo -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v28, v28 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v32 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v32, 0x40c00000, s4 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v33, v34 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v32, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v5, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v36, 0x400000, v32 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v32 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v35, vcc_lo +; GFX11-FAKE16-NEXT: v_add_f32_e64 v35, 0x40c00000, s2 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v33 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v34 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v33, 0x40c00000, s3 +; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v35, 16, 1 ; GFX11-FAKE16-NEXT: s_and_b32 s2, s1, 0xffff0000 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, v30, v25 -; GFX11-FAKE16-NEXT: v_bfe_u32 v28, v31, 16, 1 ; GFX11-FAKE16-NEXT: s_lshl_b32 s1, s1, 16 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v24, v24, v29, vcc_lo -; GFX11-FAKE16-NEXT: v_add_f32_e64 v29, 0x40c00000, s2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v23, 0xffff, v22 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v22, 16, v26 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v26, 0x7fff, v27 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, v28, v31 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v28, 0x400000, v25 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v30, 0x40c00000, s1 -; GFX11-FAKE16-NEXT: v_bfe_u32 v33, v29, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v25, v25 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v24, 16, v24 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, 0x7fff, v27 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v32, 0x400000, v31 -; GFX11-FAKE16-NEXT: s_and_b32 s1, s0, 0xffff0000 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v26, v26, v28, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v28, v30, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v31, v31 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, v33, v29 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v25, 0xffff, v24 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v24, 16, v26 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v27, v27, v32 :: v_dual_add_nc_u32 v26, v28, v30 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v33, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v34, v35 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v33 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v28, 0x7fff, v31 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v31, 0x400000, v29 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v32, 0x40c00000, s1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v29, v29 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v4, v33 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v36, vcc_lo +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v4, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v36, 0x40c00000, s1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v32 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v34 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v32, 0x400000, v35 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v34, 0x40c00000, s2 +; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v36, 16, 1 +; GFX11-FAKE16-NEXT: s_and_b32 s1, s0, 0xffff0000 ; GFX11-FAKE16-NEXT: s_lshl_b32 s0, s0, 16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v26, 0x7fff, v26 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v33, 0x400000, v30 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v34, 0x40c00000, s0 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v28, v28, v31, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v29, v32, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v30, v30 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v35, 0x400000, v32 -; GFX11-FAKE16-NEXT: v_bfe_u32 v31, v34, 16, 1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v36, 0x400000, v34 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v27, 16, v27 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v30, v26, v33, vcc_lo -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v26, 16, v28 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v28, v29, v32 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v33, 16, v178 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v29, 16, v30 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v30, v31, v34 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v31, 0xffff0000, v178 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v28, 0x7fff, v28 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v33, 0x40c00000, v33 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v109, v5, 16, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v30, 0x7fff, v30 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v31, 0x40c00000, v31 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v32, v28, v35, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v37, v33, 16, 1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v32, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v32, v34, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v33, v33 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, v35, v36 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v38, 0x40c00000, s0 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v32, v34 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v37, vcc_lo +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v3, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v33 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v33, 0x400000, v36 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v32 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v35 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v35, 0x40c00000, s1 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v34 +; GFX11-FAKE16-NEXT: v_bfe_u32 v36, v38, 16, 1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v32, v32, v33, vcc_lo ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v28, 0xffff, v29 +; GFX11-FAKE16-NEXT: v_bfe_u32 v33, v35, 16, 1 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v2, v37, vcc_lo +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v2, v0, 16, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, v33, v35 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v36, v38 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v36, 0x400000, v38 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v34 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff0000, v31 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v31, 16, v31 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xffff, v32 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_dual_add_f32 v34, 0x40c00000, v34 :: v_dual_add_f32 v31, 0x40c00000, v31 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v35 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v33, v33, v36, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-FAKE16-NEXT: v_bfe_u32 v36, v34, 16, 1 ; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v31, 16, 1 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v29, 16, v32 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v37, v33 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v30, v30, v36, vcc_lo -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v36, 16, v179 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v35, v31 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v33 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v33, v33 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v179 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v31 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x7fff, v0 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v1, v1, 16, v32 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v36, v34 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, v35, v31 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v36, 0xffff0000, v30 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v30, 16, v30 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v0, v37, vcc_lo +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v31 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 ; GFX11-FAKE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v36 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v32, v32, v37, vcc_lo +; GFX11-FAKE16-NEXT: v_add_f32_e32 v30, 0x40c00000, v30 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v31, v31 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v35 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v180 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v36 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v31, v34, v38, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v36, 16, 1 -; GFX11-FAKE16-NEXT: v_bfe_u32 v33, v35, 16, 1 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v38, 16, v180 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_dual_add_f32 v37, 0x40c00000, v37 :: v_dual_add_nc_u32 v34, v34, v36 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v33, v35 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v48, 0x400000, v35 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v38, 0x40c00000, v38 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v33, 0xffff, v33 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v34 +; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v31, v35, v37 :: v_dual_add_nc_u32 v32, 0x7fff, v32 +; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v36, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v30, 16, 1 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v0, v0, 16, v33 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v31, 16, v31 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v35, v36 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v29 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v34, v30 +; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v32, v32, v38 :: v_dual_lshlrev_b32 v29, 16, v29 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v30 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v35 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_add_f32_e32 v29, 0x40c00000, v29 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v30, v30 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v31, 0xffff, v31 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v36 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xffff, v32 -; GFX11-FAKE16-NEXT: v_bfe_u32 v36, v37, 16, 1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v30, v34, v37, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v35, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 +; GFX11-FAKE16-NEXT: v_bfe_u32 v36, v29, 16, 1 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v31, v32, 16, v31 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v30, 16, v30 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v34, v39, vcc_lo -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 -; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v38, 16, 1 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v178, v31, 16, v32 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, v36, v37 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v33, v33, v48 :: v_dual_lshlrev_b32 v36, 16, v182 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v34, 16, v34 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v38 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38 +; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v33, v33, v38 :: v_dual_add_nc_u32 v32, v34, v35 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v36 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v33 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v35, v38 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v182 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff, v34 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v48, 0x400000, v37 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, 0x7fff, v31 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v35 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v179, v32, 16, v34 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v30, 0xffff, v30 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v136, v2, 16, v4 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v33, v33, v39, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v38, v35, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v37, v37 -; GFX11-FAKE16-NEXT: v_bfe_u32 v37, v36, 16, 1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v36 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v36, v29 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v36, 0xffff0000, v28 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v28, 16, v28 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v29 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v29, v29 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v31, v31, v48 :: v_dual_add_nc_u32 v38, v38, v35 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v37, v36 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v181 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v38 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v38, 16, v181 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v48, 0x400000, v35 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v37, 0x40c00000, v37 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v31, 16, v31 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v38, 0x40c00000, v38 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v32, v32, v39, vcc_lo +; GFX11-FAKE16-NEXT: v_add_f32_e32 v28, 0x40c00000, v28 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v30, 0xffff, v30 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v36 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v35 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v29, v34, v37, vcc_lo ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v33, 0xffff, v33 -; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v37, 16, 1 -; GFX11-FAKE16-NEXT: v_bfe_u32 v36, v38, 16, 1 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v34, v48, vcc_lo -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v180, v31, 16, v33 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v35, v37 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v170 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v38 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v31, 16, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v36, v38 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_dual_add_f32 v35, 0x40c00000, v35 :: v_dual_lshlrev_b32 v36, 16, v170 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xffff, v32 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v28, 16, 1 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v36, 16, 1 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v30, v33, 16, v30 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v28 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v32, v32, v38 :: v_dual_add_nc_u32 v33, v35, v28 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v34, v36 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v27 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 16, v27 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v36 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v48, 0x400000, v37 -; GFX11-FAKE16-NEXT: v_bfe_u32 v38, v35, 16, 1 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v34, v39, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v39, v36, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v37, v37 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v182, v31, 16, v32 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v37, v38, v35 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v34, 16, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, v39, v36 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v33, v33, v48, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v28, v28 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v29, 16, v29 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v36 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v35 +; GFX11-FAKE16-NEXT: v_dual_add_f32 v27, 0x40c00000, v27 :: v_dual_cndmask_b32 v28, v33, v37 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v48, 0x400000, v35 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, 0x7fff, v31 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v29, 0xffff, v29 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_bfe_u32 v36, v27, 16, 1 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v28, 16, v28 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v33, v34, v38, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v35, 16, 1 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v29, v32, 16, v29 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v27 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v27, v27 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v33 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v33, 0xffff, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v37 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v169 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v31, v31, v38, vcc_lo +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v34, v35 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v36, v27 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v36, 0xffff0000, v26 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v26, 16, v26 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v28, 0xffff, v28 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v36 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v26, 0x40c00000, v26 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v35 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v28, v32, 16, v28 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v27, v34, v37, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v36, 16, 1 +; GFX11-FAKE16-NEXT: v_bfe_u32 v37, v26, 16, 1 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v39, 16, v169 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v181, v32, 16, v33 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v37, 0x40c00000, v37 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v176 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v34, v48, vcc_lo -; GFX11-FAKE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v39 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v31, 16, v31 -; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v37, 16, 1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v37 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v34 -; GFX11-FAKE16-NEXT: v_bfe_u32 v32, v36, 16, 1 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v38, 0x40c00000, v38 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v35, v37 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v35, 16, v176 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v37, v37 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v32, v36 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v31, 0xffff, v31 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v25 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v27, 16, v27 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v34, v36 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v37, v26 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v33, v33, v38, vcc_lo +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v26 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v26, v26 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v35 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v48, 0x400000, v36 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 -; GFX11-FAKE16-NEXT: v_bfe_u32 v37, v38, 16, 1 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v34, v39, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v49, v35, 16, 1 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v25 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v36 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v35 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v26, v32, v37, vcc_lo ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 ; GFX11-FAKE16-NEXT: v_and_b32_e32 v27, 0xffff, v27 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v170, v33, 16, v31 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v31, 16, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, v49, v35 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v32, v32, v48 :: v_dual_add_nc_u32 v33, v37, v38 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v174 +; GFX11-FAKE16-NEXT: v_dual_add_f32 v25, 0x40c00000, v25 :: v_dual_lshlrev_b32 v36, 16, v24 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v24, 0xffff0000, v24 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v32, v34, v38, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v35, 16, 1 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v27, v33, 16, v27 +; GFX11-FAKE16-NEXT: v_bfe_u32 v33, v25, 16, 1 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v36 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v35 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v34, v35 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v36 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v36, 0x400000, v35 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v33, v25 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v26, 16, v26 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v24, 0x40c00000, v24 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v25 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-FAKE16-NEXT: v_bfe_u32 v39, v36, 16, 1 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v34, v37, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v25, v25 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v26, 0xffff, v26 +; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v24, 16, 1 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v23 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v23, 16, v23 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v25, v33, v38, vcc_lo +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v39, v36 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v26, v32, 16, v26 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v34 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v35, v24 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v35, 0x400000, v36 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v38 -; GFX11-FAKE16-NEXT: v_dual_add_f32 v35, 0x40c00000, v37 :: v_dual_cndmask_b32 v34, v34, v36 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v36, 16, v174 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xffff, v32 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_bfe_u32 v37, v35, 16, 1 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v34, 16, v34 -; GFX11-FAKE16-NEXT: v_dual_add_f32 v36, 0x40c00000, v36 :: v_dual_cndmask_b32 v33, v33, v39 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v169, v31, 16, v32 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v37, v37, v35 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff, v34 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_bfe_u32 v31, v36, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v25, 16, v25 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v24 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v23, 0x40c00000, v23 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v33, v33, v35, vcc_lo +; GFX11-FAKE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v37 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v24, v24 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v25, 0xffff, v25 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v36, 0xffff0000, v22 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xffff0000, v171 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v22, 16, v22 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v24, v34, v38, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v35, 16, 1 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v25, v32, 16, v25 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v33, 0xffff, v33 +; GFX11-FAKE16-NEXT: v_bfe_u32 v32, v23, 16, 1 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v24, 16, v24 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v34, v35 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v36 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v38, 16, v177 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, v31, v36 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v176, v33, 16, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v37 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v32, v23 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v24, v24, 16, v33 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v34 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v34, 0x400000, v35 -; GFX11-FAKE16-NEXT: v_dual_add_f32 v32, 0x40c00000, v32 :: v_dual_lshlrev_b32 v37, 16, v171 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, 0x7fff, v31 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v38, 0x40c00000, v38 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v36, 16, 1 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v22, 0x40c00000, v22 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v36 ; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v33, v33, v34, vcc_lo -; GFX11-FAKE16-NEXT: v_or_b32_e32 v34, 0x400000, v36 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v37 -; GFX11-FAKE16-NEXT: v_bfe_u32 v37, v32, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v32 -; GFX11-FAKE16-NEXT: v_bfe_u32 v50, v38, 16, 1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v48, 0x400000, v35 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v31, v31, v34 :: v_dual_add_nc_u32 v36, v37, v32 -; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v35, 16, 1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v177 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, 0x7fff, v36 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v31, 16, v31 -; GFX11-FAKE16-NEXT: v_dual_add_f32 v37, 0x40c00000, v37 :: v_dual_add_nc_u32 v34, v34, v35 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v34, 0x400000, v23 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v23, v23 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v22 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v32, v36, v39, vcc_lo -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v23, v32, v34 :: v_dual_add_nc_u32 v34, v35, v36 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v21 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v21, 16, v21 +; GFX11-FAKE16-NEXT: v_bfe_u32 v32, v22, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 -; GFX11-FAKE16-NEXT: v_bfe_u32 v49, v37, 16, 1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, v50, v38 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v38 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v50, 16, v184 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v35 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_dual_add_f32 v21, 0x40c00000, v21 :: v_dual_add_nc_u32 v32, v32, v22 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v23, 16, v23 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v34, v37, vcc_lo ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v34, v34, v48 :: v_dual_add_nc_u32 v35, v49, v37 +; GFX11-FAKE16-NEXT: v_bfe_u32 v39, v35, 16, 1 +; GFX11-FAKE16-NEXT: v_bfe_u32 v48, v21, 16, 1 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v22, v22 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v21 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v23, 0xffff, v23 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, v48, v21 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v34, 16, v34 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v22, v32, v38, vcc_lo +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v39, v35 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v20 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, 0x7fff, v36 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v48, 0xffff0000, v184 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v20, 16, v20 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v21, v21 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v22, 16, v22 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v35 +; GFX11-FAKE16-NEXT: v_dual_add_f32 v20, 0x40c00000, v20 :: v_dual_cndmask_b32 v21, v36, v37 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v38 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v22, 0xffff, v22 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v34, 16, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v49, 0x400000, v37 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v36, v36, v39, vcc_lo -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v37, v37 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v37, 0x40c00000, v50 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v31, 0xffff, v31 +; GFX11-FAKE16-NEXT: v_bfe_u32 v37, v20, 16, 1 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v21, 16, v21 +; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v36, 16, 1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v32, v32, v39, vcc_lo +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v23, v33, 16, v23 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v22, v34, 16, v22 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v21, 0xffff, v21 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, v35, v36 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 -; GFX11-FAKE16-NEXT: v_dual_add_f32 v38, 0x40c00000, v48 :: v_dual_cndmask_b32 v35, v35, v49 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v36, 16, v36 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff, v34 -; GFX11-FAKE16-NEXT: v_bfe_u32 v48, v37, 16, 1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_bfe_u32 v39, v38, 16, 1 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v35, 16, v35 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v36, 0xffff, v36 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v174, v33, 16, v31 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v171, v32, 16, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, v48, v37 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v33, 0xffff0000, v175 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v34, 16, v175 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v39, v39, v38 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v177, v35, 16, v36 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, 0x7fff, v31 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v35, 0x400000, v37 -; GFX11-FAKE16-NEXT: v_dual_add_f32 v33, 0x40c00000, v33 :: v_dual_add_f32 v34, 0x40c00000, v34 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v37, v37 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v39 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v36, 0x400000, v38 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v37, v20 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff0000, v19 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v19, 16, v19 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v20, v20 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v21, v32, 16, v21 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v35 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v35, 0x400000, v20 +; GFX11-FAKE16-NEXT: v_dual_add_f32 v19, 0x40c00000, v19 :: v_dual_add_f32 v34, 0x40c00000, v34 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v36 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v20, v33, v35 :: v_dual_and_b32 v33, 0xffff0000, v18 +; GFX11-FAKE16-NEXT: v_bfe_u32 v38, v19, 16, 1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v34, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 +; GFX11-FAKE16-NEXT: v_dual_add_f32 v33, 0x40c00000, v33 :: v_dual_lshlrev_b32 v18, 16, v18 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, v38, v19 +; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v32, v32, v37 :: v_dual_add_nc_u32 v35, v35, v34 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v19 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-FAKE16-NEXT: v_bfe_u32 v37, v33, 16, 1 -; GFX11-FAKE16-NEXT: v_bfe_u32 v39, v34, 16, 1 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v31, v31, v35, vcc_lo -; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v173 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v48, 16, v173 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v49, 0x400000, v33 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v31, 16, v31 -; GFX11-FAKE16-NEXT: v_dual_add_f32 v35, 0x40c00000, v35 :: v_dual_cndmask_b32 v32, v32, v36 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, v37, v33 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v37, v39, v34 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, 0x7fff, v36 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v19, v19 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v34 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_bfe_u32 v38, v35, 16, 1 +; GFX11-FAKE16-NEXT: v_dual_add_f32 v18, 0x40c00000, v18 :: v_dual_add_nc_u32 v37, v37, v33 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v19, v36, v38, vcc_lo ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, 0x7fff, v36 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v37, 0x7fff, v37 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v38, v38, v35 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v31, 0xffff, v31 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v122, v3, 16, v6 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v37, v39, vcc_lo +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v20, 16, v20 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, 0x7fff, v37 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v33 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v18 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v35, v39, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v18, 16, 1 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v33, v33 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v37, 0x7fff, v38 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v38, 0x40c00000, v48 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v35 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v33, v36, v49 :: v_dual_lshlrev_b32 v48, 16, v183 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_bfe_u32 v36, v38, 16, 1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v49, 0x400000, v38 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v48, 0x40c00000, v48 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v34, 16, v34 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v35, v37, v39, vcc_lo -; GFX11-FAKE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v172 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v39, 16, v172 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, v36, v38 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v55, 0x400000, v48 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v19, 16, v19 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v20, 0xffff, v20 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, v35, v18 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v33, v36, v37, vcc_lo +; GFX11-FAKE16-NEXT: v_and_b32_e32 v36, 0xffff0000, v17 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v17, 16, v17 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v37, 16, v16 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v18, v18 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_dual_add_f32 v36, 0x40c00000, v36 :: v_dual_add_f32 v17, 0x40c00000, v17 ; GFX11-FAKE16-NEXT: v_add_f32_e32 v37, 0x40c00000, v37 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v39, 0x40c00000, v39 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, 0x7fff, v36 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff, v34 -; GFX11-FAKE16-NEXT: v_bfe_u32 v50, v37, 16, 1 -; GFX11-FAKE16-NEXT: v_bfe_u32 v38, v39, 16, 1 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v36, v36, v49, vcc_lo -; GFX11-FAKE16-NEXT: v_or_b32_e32 v54, 0x400000, v39 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v39, v39 -; GFX11-FAKE16-NEXT: v_dual_add_f32 v50, 0x40c00000, v51 :: v_dual_add_nc_u32 v49, v50, v37 -; GFX11-FAKE16-NEXT: v_bfe_u32 v51, v48, 16, 1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v38, v38, v39 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v53, 0x400000, v37 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v16, 0xffff0000, v16 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v18, v35, v38, vcc_lo ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v49, 0x7fff, v49 -; GFX11-FAKE16-NEXT: v_bfe_u32 v52, v50, 16, 1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v51, v51, v48 +; GFX11-FAKE16-NEXT: v_bfe_u32 v39, v36, 16, 1 +; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v17, 16, 1 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v50, 0x400000, v17 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v16, 0x40c00000, v16 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v17, v17 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v38, v39, v36 +; GFX11-FAKE16-NEXT: v_bfe_u32 v39, v37, 16, 1 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, v35, v17 +; GFX11-FAKE16-NEXT: v_bfe_u32 v48, v16, 16, 1 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v51, 0x400000, v37 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v38, 0x7fff, v38 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v36, 16, v36 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v35, 16, v35 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v52, v52, v50 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v38, v38, v54 :: v_dual_add_nc_u32 v51, 0x7fff, v51 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v48, v48 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v36, 0xffff, v36 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v39, 0x7fff, v52 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v52, 0x400000, v50 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v38, 16, v38 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v48, v51, v55, vcc_lo +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v39, v39, v37 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v49, 0x400000, v36 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v48, v48, v16 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v18, 16, v18 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v39, 0x7fff, v39 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v17, v35, v50, vcc_lo ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v37, v37 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v184, v32, 16, v31 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v175, v33, 16, v34 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v38, 0xffff, v38 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v48, 16, v48 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v37, v49, v53, vcc_lo -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v50, v50 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v173, v35, 16, v36 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v97, v8, 16, v10 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v48, 0xffff, v48 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v37, 16, v37 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v39, v39, v52, vcc_lo -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v86, v9, 16, v12 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v76, v11, 16, v13 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v67, v14, 16, v17 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v172, v37, 16, v38 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v39, 16, v39 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v59, v16, 16, v19 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v52, v18, 16, v20 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v46, v21, 16, v23 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v41, v22, 16, v25 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v183, v39, 16, v48 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v37, v24, 16, v27 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v34, v26, 16, v28 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v32, v29, 16, v30 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v48 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v48, 0x400000, v16 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v34, 16, v34 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v17, 16, v17 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v37, v39, v51, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v19, 0xffff, v19 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v18, 0xffff, v18 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v17, 0xffff, v17 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v36, v38, v49, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v16, v16 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v20, v32, 16, v20 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v19, v34, 16, v19 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v18, v33, 16, v18 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v36, 16, v36 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v16, v35, v48, vcc_lo +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v35, 16, v37 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v17, v36, 16, v17 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v16, 16, v16 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff, v35 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v16, v16, 16, v35 ; GFX11-FAKE16-NEXT: .LBB63_3: ; %end -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v3, v41 :: v_dual_mov_b32 v4, v46 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v6, v59 :: v_dual_mov_b32 v9, v86 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v7, v67 :: v_dual_mov_b32 v8, v76 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v10, v97 :: v_dual_mov_b32 v13, v136 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v11, v109 :: v_dual_mov_b32 v12, v122 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v14, v151 :: v_dual_mov_b32 v17, v172 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v18, v173 :: v_dual_mov_b32 v19, v175 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v20, v184 :: v_dual_mov_b32 v23, v174 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v22, v171 :: v_dual_mov_b32 v25, v169 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v26, v170 :: v_dual_mov_b32 v29, v180 -; GFX11-FAKE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-FAKE16-NEXT: scratch_load_b32 v184, off, s32 -; GFX11-FAKE16-NEXT: scratch_load_b32 v175, off, s32 offset:4 -; GFX11-FAKE16-NEXT: scratch_load_b32 v174, off, s32 offset:8 -; GFX11-FAKE16-NEXT: scratch_load_b32 v173, off, s32 offset:12 -; GFX11-FAKE16-NEXT: scratch_load_b32 v172, off, s32 offset:16 -; GFX11-FAKE16-NEXT: scratch_load_b32 v171, off, s32 offset:20 -; GFX11-FAKE16-NEXT: scratch_load_b32 v170, off, s32 offset:24 -; GFX11-FAKE16-NEXT: scratch_load_b32 v169, off, s32 offset:28 -; GFX11-FAKE16-NEXT: scratch_load_b32 v168, off, s32 offset:32 -; GFX11-FAKE16-NEXT: scratch_load_b32 v159, off, s32 offset:36 -; GFX11-FAKE16-NEXT: scratch_load_b32 v158, off, s32 offset:40 -; GFX11-FAKE16-NEXT: scratch_load_b32 v157, off, s32 offset:44 -; GFX11-FAKE16-NEXT: scratch_load_b32 v156, off, s32 offset:48 -; GFX11-FAKE16-NEXT: scratch_load_b32 v155, off, s32 offset:52 -; GFX11-FAKE16-NEXT: scratch_load_b32 v154, off, s32 offset:56 -; GFX11-FAKE16-NEXT: scratch_load_b32 v153, off, s32 offset:60 -; GFX11-FAKE16-NEXT: scratch_load_b32 v152, off, s32 offset:64 -; GFX11-FAKE16-NEXT: scratch_load_b32 v143, off, s32 offset:68 -; GFX11-FAKE16-NEXT: scratch_load_b32 v142, off, s32 offset:72 -; GFX11-FAKE16-NEXT: scratch_load_b32 v141, off, s32 offset:76 -; GFX11-FAKE16-NEXT: scratch_load_b32 v140, off, s32 offset:80 -; GFX11-FAKE16-NEXT: scratch_load_b32 v139, off, s32 offset:84 -; GFX11-FAKE16-NEXT: scratch_load_b32 v138, off, s32 offset:88 -; GFX11-FAKE16-NEXT: scratch_load_b32 v137, off, s32 offset:92 -; GFX11-FAKE16-NEXT: scratch_load_b32 v136, off, s32 offset:96 -; GFX11-FAKE16-NEXT: scratch_load_b32 v127, off, s32 offset:100 -; GFX11-FAKE16-NEXT: scratch_load_b32 v126, off, s32 offset:104 -; GFX11-FAKE16-NEXT: scratch_load_b32 v125, off, s32 offset:108 -; GFX11-FAKE16-NEXT: scratch_load_b32 v124, off, s32 offset:112 -; GFX11-FAKE16-NEXT: scratch_load_b32 v123, off, s32 offset:116 -; GFX11-FAKE16-NEXT: scratch_load_b32 v122, off, s32 offset:120 -; GFX11-FAKE16-NEXT: scratch_load_b32 v121, off, s32 offset:124 -; GFX11-FAKE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-FAKE16-NEXT: scratch_load_b32 v120, off, s32 offset:128 -; GFX11-FAKE16-NEXT: scratch_load_b32 v111, off, s32 offset:132 -; GFX11-FAKE16-NEXT: scratch_load_b32 v110, off, s32 offset:136 -; GFX11-FAKE16-NEXT: scratch_load_b32 v109, off, s32 offset:140 -; GFX11-FAKE16-NEXT: scratch_load_b32 v108, off, s32 offset:144 -; GFX11-FAKE16-NEXT: scratch_load_b32 v107, off, s32 offset:148 -; GFX11-FAKE16-NEXT: scratch_load_b32 v106, off, s32 offset:152 -; GFX11-FAKE16-NEXT: scratch_load_b32 v105, off, s32 offset:156 -; GFX11-FAKE16-NEXT: scratch_load_b32 v104, off, s32 offset:160 -; GFX11-FAKE16-NEXT: scratch_load_b32 v95, off, s32 offset:164 -; GFX11-FAKE16-NEXT: scratch_load_b32 v94, off, s32 offset:168 -; GFX11-FAKE16-NEXT: scratch_load_b32 v93, off, s32 offset:172 -; GFX11-FAKE16-NEXT: scratch_load_b32 v92, off, s32 offset:176 -; GFX11-FAKE16-NEXT: scratch_load_b32 v91, off, s32 offset:180 -; GFX11-FAKE16-NEXT: scratch_load_b32 v90, off, s32 offset:184 -; GFX11-FAKE16-NEXT: scratch_load_b32 v89, off, s32 offset:188 -; GFX11-FAKE16-NEXT: scratch_load_b32 v88, off, s32 offset:192 -; GFX11-FAKE16-NEXT: scratch_load_b32 v79, off, s32 offset:196 -; GFX11-FAKE16-NEXT: scratch_load_b32 v78, off, s32 offset:200 -; GFX11-FAKE16-NEXT: scratch_load_b32 v77, off, s32 offset:204 -; GFX11-FAKE16-NEXT: scratch_load_b32 v76, off, s32 offset:208 -; GFX11-FAKE16-NEXT: scratch_load_b32 v75, off, s32 offset:212 -; GFX11-FAKE16-NEXT: scratch_load_b32 v74, off, s32 offset:216 -; GFX11-FAKE16-NEXT: scratch_load_b32 v73, off, s32 offset:220 -; GFX11-FAKE16-NEXT: scratch_load_b32 v72, off, s32 offset:224 -; GFX11-FAKE16-NEXT: scratch_load_b32 v63, off, s32 offset:228 -; GFX11-FAKE16-NEXT: scratch_load_b32 v62, off, s32 offset:232 -; GFX11-FAKE16-NEXT: scratch_load_b32 v61, off, s32 offset:236 -; GFX11-FAKE16-NEXT: scratch_load_b32 v60, off, s32 offset:240 -; GFX11-FAKE16-NEXT: scratch_load_b32 v59, off, s32 offset:244 -; GFX11-FAKE16-NEXT: scratch_load_b32 v58, off, s32 offset:248 -; GFX11-FAKE16-NEXT: scratch_load_b32 v57, off, s32 offset:252 -; GFX11-FAKE16-NEXT: s_clause 0x8 ; 36-byte Folded Reload -; GFX11-FAKE16-NEXT: scratch_load_b32 v56, off, s32 offset:256 -; GFX11-FAKE16-NEXT: scratch_load_b32 v47, off, s32 offset:260 -; GFX11-FAKE16-NEXT: scratch_load_b32 v46, off, s32 offset:264 -; GFX11-FAKE16-NEXT: scratch_load_b32 v45, off, s32 offset:268 -; GFX11-FAKE16-NEXT: scratch_load_b32 v44, off, s32 offset:272 -; GFX11-FAKE16-NEXT: scratch_load_b32 v43, off, s32 offset:276 -; GFX11-FAKE16-NEXT: scratch_load_b32 v42, off, s32 offset:280 -; GFX11-FAKE16-NEXT: scratch_load_b32 v41, off, s32 offset:284 -; GFX11-FAKE16-NEXT: scratch_load_b32 v40, off, s32 offset:288 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, v32 :: v_dual_mov_b32 v1, v34 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v2, v37 :: v_dual_mov_b32 v5, v52 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v16, v183 :: v_dual_mov_b32 v21, v177 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v24, v176 :: v_dual_mov_b32 v27, v181 -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v28, v182 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v30, v179 :: v_dual_mov_b32 v31, v178 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-FAKE16-NEXT: .LBB63_4: -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr15 ; GFX11-FAKE16-NEXT: s_branch .LBB63_2 %cmp = icmp eq i32 %b, 0 br i1 %cmp, label %cmp.true, label %cmp.false @@ -108630,252 +106562,80 @@ define inreg <16 x i64> @bitcast_v64f16_to_v16i64_scalar(<64 x half> inreg %a, i ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 -; GFX11-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:292 -; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:288 -; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:284 -; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:280 -; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:276 -; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:272 -; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:268 -; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:264 -; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:260 -; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:256 -; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:252 -; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:248 -; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:244 -; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:240 -; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:236 -; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:232 -; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:228 -; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:224 -; GFX11-NEXT: scratch_store_b32 off, v74, s32 offset:220 -; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:216 -; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:212 -; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:208 -; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:204 -; GFX11-NEXT: scratch_store_b32 off, v79, s32 offset:200 -; GFX11-NEXT: scratch_store_b32 off, v88, s32 offset:196 -; GFX11-NEXT: scratch_store_b32 off, v89, s32 offset:192 -; GFX11-NEXT: scratch_store_b32 off, v90, s32 offset:188 -; GFX11-NEXT: scratch_store_b32 off, v91, s32 offset:184 -; GFX11-NEXT: scratch_store_b32 off, v92, s32 offset:180 -; GFX11-NEXT: scratch_store_b32 off, v93, s32 offset:176 -; GFX11-NEXT: scratch_store_b32 off, v94, s32 offset:172 -; GFX11-NEXT: scratch_store_b32 off, v95, s32 offset:168 -; GFX11-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-NEXT: scratch_store_b32 off, v104, s32 offset:164 -; GFX11-NEXT: scratch_store_b32 off, v105, s32 offset:160 -; GFX11-NEXT: scratch_store_b32 off, v106, s32 offset:156 -; GFX11-NEXT: scratch_store_b32 off, v107, s32 offset:152 -; GFX11-NEXT: scratch_store_b32 off, v108, s32 offset:148 -; GFX11-NEXT: scratch_store_b32 off, v109, s32 offset:144 -; GFX11-NEXT: scratch_store_b32 off, v110, s32 offset:140 -; GFX11-NEXT: scratch_store_b32 off, v111, s32 offset:136 -; GFX11-NEXT: scratch_store_b32 off, v120, s32 offset:132 -; GFX11-NEXT: scratch_store_b32 off, v121, s32 offset:128 -; GFX11-NEXT: scratch_store_b32 off, v122, s32 offset:124 -; GFX11-NEXT: scratch_store_b32 off, v123, s32 offset:120 -; GFX11-NEXT: scratch_store_b32 off, v124, s32 offset:116 -; GFX11-NEXT: scratch_store_b32 off, v125, s32 offset:112 -; GFX11-NEXT: scratch_store_b32 off, v126, s32 offset:108 -; GFX11-NEXT: scratch_store_b32 off, v127, s32 offset:104 -; GFX11-NEXT: scratch_store_b32 off, v136, s32 offset:100 -; GFX11-NEXT: scratch_store_b32 off, v137, s32 offset:96 -; GFX11-NEXT: scratch_store_b32 off, v138, s32 offset:92 -; GFX11-NEXT: scratch_store_b32 off, v139, s32 offset:88 -; GFX11-NEXT: scratch_store_b32 off, v140, s32 offset:84 -; GFX11-NEXT: scratch_store_b32 off, v141, s32 offset:80 -; GFX11-NEXT: scratch_store_b32 off, v142, s32 offset:76 -; GFX11-NEXT: scratch_store_b32 off, v143, s32 offset:72 -; GFX11-NEXT: scratch_store_b32 off, v152, s32 offset:68 -; GFX11-NEXT: scratch_store_b32 off, v153, s32 offset:64 -; GFX11-NEXT: scratch_store_b32 off, v154, s32 offset:60 -; GFX11-NEXT: scratch_store_b32 off, v155, s32 offset:56 -; GFX11-NEXT: scratch_store_b32 off, v156, s32 offset:52 -; GFX11-NEXT: scratch_store_b32 off, v157, s32 offset:48 -; GFX11-NEXT: scratch_store_b32 off, v158, s32 offset:44 -; GFX11-NEXT: scratch_store_b32 off, v159, s32 offset:40 -; GFX11-NEXT: s_clause 0x9 ; 40-byte Folded Spill -; GFX11-NEXT: scratch_store_b32 off, v168, s32 offset:36 -; GFX11-NEXT: scratch_store_b32 off, v169, s32 offset:32 -; GFX11-NEXT: scratch_store_b32 off, v170, s32 offset:28 -; GFX11-NEXT: scratch_store_b32 off, v171, s32 offset:24 -; GFX11-NEXT: scratch_store_b32 off, v172, s32 offset:20 -; GFX11-NEXT: scratch_store_b32 off, v173, s32 offset:16 -; GFX11-NEXT: scratch_store_b32 off, v174, s32 offset:12 -; GFX11-NEXT: scratch_store_b32 off, v175, s32 offset:8 -; GFX11-NEXT: scratch_store_b32 off, v184, s32 offset:4 -; GFX11-NEXT: scratch_store_b32 off, v185, s32 -; GFX11-NEXT: v_dual_mov_b32 v176, v13 :: v_dual_mov_b32 v177, v12 -; GFX11-NEXT: v_dual_mov_b32 v178, v11 :: v_dual_mov_b32 v179, v10 -; GFX11-NEXT: v_dual_mov_b32 v180, v9 :: v_dual_mov_b32 v181, v8 -; GFX11-NEXT: v_dual_mov_b32 v182, v7 :: v_dual_mov_b32 v183, v6 -; GFX11-NEXT: v_dual_mov_b32 v170, v5 :: v_dual_mov_b32 v171, v4 -; GFX11-NEXT: v_dual_mov_b32 v172, v3 :: v_dual_mov_b32 v173, v2 -; GFX11-NEXT: v_dual_mov_b32 v174, v1 :: v_dual_mov_b32 v175, v0 -; GFX11-NEXT: v_dual_mov_b32 v184, s28 :: v_dual_mov_b32 v185, s29 +; GFX11-NEXT: v_dual_mov_b32 v31, v13 :: v_dual_mov_b32 v30, v12 +; GFX11-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 +; GFX11-NEXT: v_dual_mov_b32 v27, v9 :: v_dual_mov_b32 v26, v8 +; GFX11-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-NEXT: v_dual_mov_b32 v16, s28 :: v_dual_mov_b32 v17, s29 ; GFX11-NEXT: s_mov_b32 s4, 0 ; GFX11-NEXT: s_and_b32 s5, vcc_lo, exec_lo ; GFX11-NEXT: s_cbranch_scc0 .LBB67_4 ; GFX11-NEXT: ; %bb.1: ; %cmp.false -; GFX11-NEXT: v_dual_mov_b32 v47, s0 :: v_dual_mov_b32 v52, s2 -; GFX11-NEXT: v_dual_mov_b32 v49, s1 :: v_dual_mov_b32 v56, s3 -; GFX11-NEXT: v_dual_mov_b32 v61, s16 :: v_dual_mov_b32 v74, s18 -; GFX11-NEXT: v_dual_mov_b32 v67, s17 :: v_dual_mov_b32 v82, s19 -; GFX11-NEXT: v_dual_mov_b32 v91, s20 :: v_dual_mov_b32 v112, s22 -; GFX11-NEXT: v_dual_mov_b32 v101, s21 :: v_dual_mov_b32 v124, s23 -; GFX11-NEXT: v_dual_mov_b32 v137, s24 :: v_dual_mov_b32 v14, s26 -; GFX11-NEXT: v_dual_mov_b32 v151, s25 :: v_dual_mov_b32 v30, s27 +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-NEXT: v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v5, s17 +; GFX11-NEXT: v_dual_mov_b32 v6, s18 :: v_dual_mov_b32 v7, s19 +; GFX11-NEXT: v_dual_mov_b32 v8, s20 :: v_dual_mov_b32 v9, s21 +; GFX11-NEXT: v_dual_mov_b32 v10, s22 :: v_dual_mov_b32 v11, s23 +; GFX11-NEXT: v_dual_mov_b32 v12, s24 :: v_dual_mov_b32 v13, s25 +; GFX11-NEXT: v_dual_mov_b32 v14, s26 :: v_dual_mov_b32 v15, s27 ; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 ; GFX11-NEXT: s_cbranch_vccnz .LBB67_3 ; GFX11-NEXT: .LBB67_2: ; %cmp.true -; GFX11-NEXT: v_pk_add_f16 v30, 0x200, s27 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v15, 0x200, s27 op_sel_hi:[0,1] ; GFX11-NEXT: v_pk_add_f16 v14, 0x200, s26 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v176, 0x200, v176 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v177, 0x200, v177 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v178, 0x200, v178 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v179, 0x200, v179 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v180, 0x200, v180 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v181, 0x200, v181 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v182, 0x200, v182 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v183, 0x200, v183 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v170, 0x200, v170 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v171, 0x200, v171 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v172, 0x200, v172 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v173, 0x200, v173 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v174, 0x200, v174 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v175, 0x200, v175 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v185, 0x200, v185 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v184, 0x200, v184 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v151, 0x200, s25 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v137, 0x200, s24 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v124, 0x200, s23 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v112, 0x200, s22 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v101, 0x200, s21 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v91, 0x200, s20 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v82, 0x200, s19 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v74, 0x200, s18 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v67, 0x200, s17 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v61, 0x200, s16 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v56, 0x200, s3 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v52, 0x200, s2 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v49, 0x200, s1 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v47, 0x200, s0 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v13, 0x200, s25 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v12, 0x200, s24 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v11, 0x200, s23 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v10, 0x200, s22 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v9, 0x200, s21 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v8, 0x200, s20 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v7, 0x200, s19 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v6, 0x200, s18 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v5, 0x200, s17 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v4, 0x200, s16 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v3, 0x200, s3 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v2, 0x200, s2 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v1, 0x200, s1 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v0, 0x200, s0 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v31, 0x200, v31 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v30, 0x200, v30 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v29, 0x200, v29 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v28, 0x200, v28 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v27, 0x200, v27 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v26, 0x200, v26 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v25, 0x200, v25 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v24, 0x200, v24 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v23, 0x200, v23 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v22, 0x200, v22 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v21, 0x200, v21 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v20, 0x200, v20 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v19, 0x200, v19 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v18, 0x200, v18 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v17, 0x200, v17 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v16, 0x200, v16 op_sel_hi:[0,1] ; GFX11-NEXT: .LBB67_3: ; %end -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_dual_mov_b32 v0, v47 :: v_dual_mov_b32 v1, v49 -; GFX11-NEXT: v_dual_mov_b32 v3, v56 :: v_dual_mov_b32 v4, v61 -; GFX11-NEXT: v_dual_mov_b32 v6, v74 :: v_dual_mov_b32 v9, v101 -; GFX11-NEXT: v_dual_mov_b32 v7, v82 :: v_dual_mov_b32 v8, v91 -; GFX11-NEXT: v_dual_mov_b32 v11, v124 :: v_dual_mov_b32 v12, v137 -; GFX11-NEXT: v_dual_mov_b32 v15, v30 :: v_dual_mov_b32 v16, v184 -; GFX11-NEXT: v_dual_mov_b32 v17, v185 :: v_dual_mov_b32 v18, v175 -; GFX11-NEXT: v_dual_mov_b32 v19, v174 :: v_dual_mov_b32 v20, v173 -; GFX11-NEXT: v_dual_mov_b32 v21, v172 :: v_dual_mov_b32 v22, v171 -; GFX11-NEXT: v_dual_mov_b32 v23, v170 :: v_dual_mov_b32 v24, v183 -; GFX11-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-NEXT: scratch_load_b32 v185, off, s32 -; GFX11-NEXT: scratch_load_b32 v184, off, s32 offset:4 -; GFX11-NEXT: scratch_load_b32 v175, off, s32 offset:8 -; GFX11-NEXT: scratch_load_b32 v174, off, s32 offset:12 -; GFX11-NEXT: scratch_load_b32 v173, off, s32 offset:16 -; GFX11-NEXT: scratch_load_b32 v172, off, s32 offset:20 -; GFX11-NEXT: scratch_load_b32 v171, off, s32 offset:24 -; GFX11-NEXT: scratch_load_b32 v170, off, s32 offset:28 -; GFX11-NEXT: scratch_load_b32 v169, off, s32 offset:32 -; GFX11-NEXT: scratch_load_b32 v168, off, s32 offset:36 -; GFX11-NEXT: scratch_load_b32 v159, off, s32 offset:40 -; GFX11-NEXT: scratch_load_b32 v158, off, s32 offset:44 -; GFX11-NEXT: scratch_load_b32 v157, off, s32 offset:48 -; GFX11-NEXT: scratch_load_b32 v156, off, s32 offset:52 -; GFX11-NEXT: scratch_load_b32 v155, off, s32 offset:56 -; GFX11-NEXT: scratch_load_b32 v154, off, s32 offset:60 -; GFX11-NEXT: scratch_load_b32 v153, off, s32 offset:64 -; GFX11-NEXT: scratch_load_b32 v152, off, s32 offset:68 -; GFX11-NEXT: scratch_load_b32 v143, off, s32 offset:72 -; GFX11-NEXT: scratch_load_b32 v142, off, s32 offset:76 -; GFX11-NEXT: scratch_load_b32 v141, off, s32 offset:80 -; GFX11-NEXT: scratch_load_b32 v140, off, s32 offset:84 -; GFX11-NEXT: scratch_load_b32 v139, off, s32 offset:88 -; GFX11-NEXT: scratch_load_b32 v138, off, s32 offset:92 -; GFX11-NEXT: scratch_load_b32 v137, off, s32 offset:96 -; GFX11-NEXT: scratch_load_b32 v136, off, s32 offset:100 -; GFX11-NEXT: scratch_load_b32 v127, off, s32 offset:104 -; GFX11-NEXT: scratch_load_b32 v126, off, s32 offset:108 -; GFX11-NEXT: scratch_load_b32 v125, off, s32 offset:112 -; GFX11-NEXT: scratch_load_b32 v124, off, s32 offset:116 -; GFX11-NEXT: scratch_load_b32 v123, off, s32 offset:120 -; GFX11-NEXT: scratch_load_b32 v122, off, s32 offset:124 -; GFX11-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-NEXT: scratch_load_b32 v121, off, s32 offset:128 -; GFX11-NEXT: scratch_load_b32 v120, off, s32 offset:132 -; GFX11-NEXT: scratch_load_b32 v111, off, s32 offset:136 -; GFX11-NEXT: scratch_load_b32 v110, off, s32 offset:140 -; GFX11-NEXT: scratch_load_b32 v109, off, s32 offset:144 -; GFX11-NEXT: scratch_load_b32 v108, off, s32 offset:148 -; GFX11-NEXT: scratch_load_b32 v107, off, s32 offset:152 -; GFX11-NEXT: scratch_load_b32 v106, off, s32 offset:156 -; GFX11-NEXT: scratch_load_b32 v105, off, s32 offset:160 -; GFX11-NEXT: scratch_load_b32 v104, off, s32 offset:164 -; GFX11-NEXT: scratch_load_b32 v95, off, s32 offset:168 -; GFX11-NEXT: scratch_load_b32 v94, off, s32 offset:172 -; GFX11-NEXT: scratch_load_b32 v93, off, s32 offset:176 -; GFX11-NEXT: scratch_load_b32 v92, off, s32 offset:180 -; GFX11-NEXT: scratch_load_b32 v91, off, s32 offset:184 -; GFX11-NEXT: scratch_load_b32 v90, off, s32 offset:188 -; GFX11-NEXT: scratch_load_b32 v89, off, s32 offset:192 -; GFX11-NEXT: scratch_load_b32 v88, off, s32 offset:196 -; GFX11-NEXT: scratch_load_b32 v79, off, s32 offset:200 -; GFX11-NEXT: scratch_load_b32 v78, off, s32 offset:204 -; GFX11-NEXT: scratch_load_b32 v77, off, s32 offset:208 -; GFX11-NEXT: scratch_load_b32 v76, off, s32 offset:212 -; GFX11-NEXT: scratch_load_b32 v75, off, s32 offset:216 -; GFX11-NEXT: scratch_load_b32 v74, off, s32 offset:220 -; GFX11-NEXT: scratch_load_b32 v73, off, s32 offset:224 -; GFX11-NEXT: scratch_load_b32 v72, off, s32 offset:228 -; GFX11-NEXT: scratch_load_b32 v63, off, s32 offset:232 -; GFX11-NEXT: scratch_load_b32 v62, off, s32 offset:236 -; GFX11-NEXT: scratch_load_b32 v61, off, s32 offset:240 -; GFX11-NEXT: scratch_load_b32 v60, off, s32 offset:244 -; GFX11-NEXT: scratch_load_b32 v59, off, s32 offset:248 -; GFX11-NEXT: scratch_load_b32 v58, off, s32 offset:252 -; GFX11-NEXT: s_clause 0x9 ; 40-byte Folded Reload -; GFX11-NEXT: scratch_load_b32 v57, off, s32 offset:256 -; GFX11-NEXT: scratch_load_b32 v56, off, s32 offset:260 -; GFX11-NEXT: scratch_load_b32 v47, off, s32 offset:264 -; GFX11-NEXT: scratch_load_b32 v46, off, s32 offset:268 -; GFX11-NEXT: scratch_load_b32 v45, off, s32 offset:272 -; GFX11-NEXT: scratch_load_b32 v44, off, s32 offset:276 -; GFX11-NEXT: scratch_load_b32 v43, off, s32 offset:280 -; GFX11-NEXT: scratch_load_b32 v42, off, s32 offset:284 -; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:288 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:292 -; GFX11-NEXT: v_dual_mov_b32 v2, v52 :: v_dual_mov_b32 v5, v67 -; GFX11-NEXT: v_dual_mov_b32 v10, v112 :: v_dual_mov_b32 v13, v151 -; GFX11-NEXT: v_dual_mov_b32 v25, v182 :: v_dual_mov_b32 v26, v181 -; GFX11-NEXT: v_dual_mov_b32 v27, v180 :: v_dual_mov_b32 v28, v179 -; GFX11-NEXT: v_dual_mov_b32 v29, v178 :: v_dual_mov_b32 v30, v177 -; GFX11-NEXT: v_mov_b32_e32 v31, v176 -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; GFX11-NEXT: .LBB67_4: -; GFX11-NEXT: ; implicit-def: $vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78 -; GFX11-NEXT: ; implicit-def: $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79 -; GFX11-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-NEXT: ; implicit-def: $vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81 -; GFX11-NEXT: ; implicit-def: $vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84 -; GFX11-NEXT: ; implicit-def: $vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88 -; GFX11-NEXT: ; implicit-def: $vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93 -; GFX11-NEXT: ; implicit-def: $vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99 -; GFX11-NEXT: ; implicit-def: $vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106 -; GFX11-NEXT: ; implicit-def: $vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114 -; GFX11-NEXT: ; implicit-def: $vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123 -; GFX11-NEXT: ; implicit-def: $vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133 -; GFX11-NEXT: ; implicit-def: $vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144 -; GFX11-NEXT: ; implicit-def: $vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156 -; GFX11-NEXT: ; implicit-def: $vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169 +; GFX11-NEXT: ; implicit-def: $vgpr0 +; GFX11-NEXT: ; implicit-def: $vgpr1 +; GFX11-NEXT: ; implicit-def: $vgpr2 +; GFX11-NEXT: ; implicit-def: $vgpr3 +; GFX11-NEXT: ; implicit-def: $vgpr4 +; GFX11-NEXT: ; implicit-def: $vgpr5 +; GFX11-NEXT: ; implicit-def: $vgpr6 +; GFX11-NEXT: ; implicit-def: $vgpr7 +; GFX11-NEXT: ; implicit-def: $vgpr8 +; GFX11-NEXT: ; implicit-def: $vgpr9 +; GFX11-NEXT: ; implicit-def: $vgpr10 +; GFX11-NEXT: ; implicit-def: $vgpr11 +; GFX11-NEXT: ; implicit-def: $vgpr12 +; GFX11-NEXT: ; implicit-def: $vgpr13 +; GFX11-NEXT: ; implicit-def: $vgpr14 +; GFX11-NEXT: ; implicit-def: $vgpr15 ; GFX11-NEXT: s_branch .LBB67_2 %cmp = icmp eq i32 %b, 0 br i1 %cmp, label %cmp.true, label %cmp.false @@ -111690,252 +109450,80 @@ define inreg <16 x i64> @bitcast_v64i16_to_v16i64_scalar(<64 x i16> inreg %a, i3 ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 -; GFX11-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:292 -; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:288 -; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:284 -; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:280 -; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:276 -; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:272 -; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:268 -; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:264 -; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:260 -; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:256 -; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:252 -; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:248 -; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:244 -; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:240 -; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:236 -; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:232 -; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:228 -; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:224 -; GFX11-NEXT: scratch_store_b32 off, v74, s32 offset:220 -; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:216 -; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:212 -; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:208 -; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:204 -; GFX11-NEXT: scratch_store_b32 off, v79, s32 offset:200 -; GFX11-NEXT: scratch_store_b32 off, v88, s32 offset:196 -; GFX11-NEXT: scratch_store_b32 off, v89, s32 offset:192 -; GFX11-NEXT: scratch_store_b32 off, v90, s32 offset:188 -; GFX11-NEXT: scratch_store_b32 off, v91, s32 offset:184 -; GFX11-NEXT: scratch_store_b32 off, v92, s32 offset:180 -; GFX11-NEXT: scratch_store_b32 off, v93, s32 offset:176 -; GFX11-NEXT: scratch_store_b32 off, v94, s32 offset:172 -; GFX11-NEXT: scratch_store_b32 off, v95, s32 offset:168 -; GFX11-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-NEXT: scratch_store_b32 off, v104, s32 offset:164 -; GFX11-NEXT: scratch_store_b32 off, v105, s32 offset:160 -; GFX11-NEXT: scratch_store_b32 off, v106, s32 offset:156 -; GFX11-NEXT: scratch_store_b32 off, v107, s32 offset:152 -; GFX11-NEXT: scratch_store_b32 off, v108, s32 offset:148 -; GFX11-NEXT: scratch_store_b32 off, v109, s32 offset:144 -; GFX11-NEXT: scratch_store_b32 off, v110, s32 offset:140 -; GFX11-NEXT: scratch_store_b32 off, v111, s32 offset:136 -; GFX11-NEXT: scratch_store_b32 off, v120, s32 offset:132 -; GFX11-NEXT: scratch_store_b32 off, v121, s32 offset:128 -; GFX11-NEXT: scratch_store_b32 off, v122, s32 offset:124 -; GFX11-NEXT: scratch_store_b32 off, v123, s32 offset:120 -; GFX11-NEXT: scratch_store_b32 off, v124, s32 offset:116 -; GFX11-NEXT: scratch_store_b32 off, v125, s32 offset:112 -; GFX11-NEXT: scratch_store_b32 off, v126, s32 offset:108 -; GFX11-NEXT: scratch_store_b32 off, v127, s32 offset:104 -; GFX11-NEXT: scratch_store_b32 off, v136, s32 offset:100 -; GFX11-NEXT: scratch_store_b32 off, v137, s32 offset:96 -; GFX11-NEXT: scratch_store_b32 off, v138, s32 offset:92 -; GFX11-NEXT: scratch_store_b32 off, v139, s32 offset:88 -; GFX11-NEXT: scratch_store_b32 off, v140, s32 offset:84 -; GFX11-NEXT: scratch_store_b32 off, v141, s32 offset:80 -; GFX11-NEXT: scratch_store_b32 off, v142, s32 offset:76 -; GFX11-NEXT: scratch_store_b32 off, v143, s32 offset:72 -; GFX11-NEXT: scratch_store_b32 off, v152, s32 offset:68 -; GFX11-NEXT: scratch_store_b32 off, v153, s32 offset:64 -; GFX11-NEXT: scratch_store_b32 off, v154, s32 offset:60 -; GFX11-NEXT: scratch_store_b32 off, v155, s32 offset:56 -; GFX11-NEXT: scratch_store_b32 off, v156, s32 offset:52 -; GFX11-NEXT: scratch_store_b32 off, v157, s32 offset:48 -; GFX11-NEXT: scratch_store_b32 off, v158, s32 offset:44 -; GFX11-NEXT: scratch_store_b32 off, v159, s32 offset:40 -; GFX11-NEXT: s_clause 0x9 ; 40-byte Folded Spill -; GFX11-NEXT: scratch_store_b32 off, v168, s32 offset:36 -; GFX11-NEXT: scratch_store_b32 off, v169, s32 offset:32 -; GFX11-NEXT: scratch_store_b32 off, v170, s32 offset:28 -; GFX11-NEXT: scratch_store_b32 off, v171, s32 offset:24 -; GFX11-NEXT: scratch_store_b32 off, v172, s32 offset:20 -; GFX11-NEXT: scratch_store_b32 off, v173, s32 offset:16 -; GFX11-NEXT: scratch_store_b32 off, v174, s32 offset:12 -; GFX11-NEXT: scratch_store_b32 off, v175, s32 offset:8 -; GFX11-NEXT: scratch_store_b32 off, v184, s32 offset:4 -; GFX11-NEXT: scratch_store_b32 off, v185, s32 -; GFX11-NEXT: v_dual_mov_b32 v176, v13 :: v_dual_mov_b32 v177, v12 -; GFX11-NEXT: v_dual_mov_b32 v178, v11 :: v_dual_mov_b32 v179, v10 -; GFX11-NEXT: v_dual_mov_b32 v180, v9 :: v_dual_mov_b32 v181, v8 -; GFX11-NEXT: v_dual_mov_b32 v182, v7 :: v_dual_mov_b32 v183, v6 -; GFX11-NEXT: v_dual_mov_b32 v170, v5 :: v_dual_mov_b32 v171, v4 -; GFX11-NEXT: v_dual_mov_b32 v172, v3 :: v_dual_mov_b32 v173, v2 -; GFX11-NEXT: v_dual_mov_b32 v174, v1 :: v_dual_mov_b32 v175, v0 -; GFX11-NEXT: v_dual_mov_b32 v184, s28 :: v_dual_mov_b32 v185, s29 +; GFX11-NEXT: v_dual_mov_b32 v31, v13 :: v_dual_mov_b32 v30, v12 +; GFX11-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 +; GFX11-NEXT: v_dual_mov_b32 v27, v9 :: v_dual_mov_b32 v26, v8 +; GFX11-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-NEXT: v_dual_mov_b32 v16, s28 :: v_dual_mov_b32 v17, s29 ; GFX11-NEXT: s_mov_b32 s4, 0 ; GFX11-NEXT: s_and_b32 s5, vcc_lo, exec_lo ; GFX11-NEXT: s_cbranch_scc0 .LBB71_4 ; GFX11-NEXT: ; %bb.1: ; %cmp.false -; GFX11-NEXT: v_dual_mov_b32 v47, s0 :: v_dual_mov_b32 v52, s2 -; GFX11-NEXT: v_dual_mov_b32 v49, s1 :: v_dual_mov_b32 v56, s3 -; GFX11-NEXT: v_dual_mov_b32 v61, s16 :: v_dual_mov_b32 v74, s18 -; GFX11-NEXT: v_dual_mov_b32 v67, s17 :: v_dual_mov_b32 v82, s19 -; GFX11-NEXT: v_dual_mov_b32 v91, s20 :: v_dual_mov_b32 v112, s22 -; GFX11-NEXT: v_dual_mov_b32 v101, s21 :: v_dual_mov_b32 v124, s23 -; GFX11-NEXT: v_dual_mov_b32 v137, s24 :: v_dual_mov_b32 v14, s26 -; GFX11-NEXT: v_dual_mov_b32 v151, s25 :: v_dual_mov_b32 v30, s27 +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-NEXT: v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v5, s17 +; GFX11-NEXT: v_dual_mov_b32 v6, s18 :: v_dual_mov_b32 v7, s19 +; GFX11-NEXT: v_dual_mov_b32 v8, s20 :: v_dual_mov_b32 v9, s21 +; GFX11-NEXT: v_dual_mov_b32 v10, s22 :: v_dual_mov_b32 v11, s23 +; GFX11-NEXT: v_dual_mov_b32 v12, s24 :: v_dual_mov_b32 v13, s25 +; GFX11-NEXT: v_dual_mov_b32 v14, s26 :: v_dual_mov_b32 v15, s27 ; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 ; GFX11-NEXT: s_cbranch_vccnz .LBB71_3 ; GFX11-NEXT: .LBB71_2: ; %cmp.true -; GFX11-NEXT: v_pk_add_u16 v30, s27, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v15, s27, 3 op_sel_hi:[1,0] ; GFX11-NEXT: v_pk_add_u16 v14, s26, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v176, v176, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v177, v177, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v178, v178, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v179, v179, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v180, v180, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v181, v181, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v182, v182, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v183, v183, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v170, v170, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v171, v171, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v172, v172, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v173, v173, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v174, v174, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v175, v175, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v185, v185, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v184, v184, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v151, s25, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v137, s24, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v124, s23, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v112, s22, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v101, s21, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v91, s20, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v82, s19, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v74, s18, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v67, s17, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v61, s16, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v56, s3, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v52, s2, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v49, s1, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v47, s0, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v13, s25, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v12, s24, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v11, s23, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v10, s22, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v9, s21, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v8, s20, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v7, s19, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v6, s18, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v5, s17, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v4, s16, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v3, s3, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v2, s2, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v1, s1, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v0, s0, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v31, v31, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v30, v30, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v29, v29, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v28, v28, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v27, v27, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v26, v26, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v25, v25, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v24, v24, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v23, v23, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v22, v22, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v21, v21, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v20, v20, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v19, v19, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v18, v18, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v17, v17, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v16, v16, 3 op_sel_hi:[1,0] ; GFX11-NEXT: .LBB71_3: ; %end -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_dual_mov_b32 v0, v47 :: v_dual_mov_b32 v1, v49 -; GFX11-NEXT: v_dual_mov_b32 v3, v56 :: v_dual_mov_b32 v4, v61 -; GFX11-NEXT: v_dual_mov_b32 v6, v74 :: v_dual_mov_b32 v9, v101 -; GFX11-NEXT: v_dual_mov_b32 v7, v82 :: v_dual_mov_b32 v8, v91 -; GFX11-NEXT: v_dual_mov_b32 v11, v124 :: v_dual_mov_b32 v12, v137 -; GFX11-NEXT: v_dual_mov_b32 v15, v30 :: v_dual_mov_b32 v16, v184 -; GFX11-NEXT: v_dual_mov_b32 v17, v185 :: v_dual_mov_b32 v18, v175 -; GFX11-NEXT: v_dual_mov_b32 v19, v174 :: v_dual_mov_b32 v20, v173 -; GFX11-NEXT: v_dual_mov_b32 v21, v172 :: v_dual_mov_b32 v22, v171 -; GFX11-NEXT: v_dual_mov_b32 v23, v170 :: v_dual_mov_b32 v24, v183 -; GFX11-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-NEXT: scratch_load_b32 v185, off, s32 -; GFX11-NEXT: scratch_load_b32 v184, off, s32 offset:4 -; GFX11-NEXT: scratch_load_b32 v175, off, s32 offset:8 -; GFX11-NEXT: scratch_load_b32 v174, off, s32 offset:12 -; GFX11-NEXT: scratch_load_b32 v173, off, s32 offset:16 -; GFX11-NEXT: scratch_load_b32 v172, off, s32 offset:20 -; GFX11-NEXT: scratch_load_b32 v171, off, s32 offset:24 -; GFX11-NEXT: scratch_load_b32 v170, off, s32 offset:28 -; GFX11-NEXT: scratch_load_b32 v169, off, s32 offset:32 -; GFX11-NEXT: scratch_load_b32 v168, off, s32 offset:36 -; GFX11-NEXT: scratch_load_b32 v159, off, s32 offset:40 -; GFX11-NEXT: scratch_load_b32 v158, off, s32 offset:44 -; GFX11-NEXT: scratch_load_b32 v157, off, s32 offset:48 -; GFX11-NEXT: scratch_load_b32 v156, off, s32 offset:52 -; GFX11-NEXT: scratch_load_b32 v155, off, s32 offset:56 -; GFX11-NEXT: scratch_load_b32 v154, off, s32 offset:60 -; GFX11-NEXT: scratch_load_b32 v153, off, s32 offset:64 -; GFX11-NEXT: scratch_load_b32 v152, off, s32 offset:68 -; GFX11-NEXT: scratch_load_b32 v143, off, s32 offset:72 -; GFX11-NEXT: scratch_load_b32 v142, off, s32 offset:76 -; GFX11-NEXT: scratch_load_b32 v141, off, s32 offset:80 -; GFX11-NEXT: scratch_load_b32 v140, off, s32 offset:84 -; GFX11-NEXT: scratch_load_b32 v139, off, s32 offset:88 -; GFX11-NEXT: scratch_load_b32 v138, off, s32 offset:92 -; GFX11-NEXT: scratch_load_b32 v137, off, s32 offset:96 -; GFX11-NEXT: scratch_load_b32 v136, off, s32 offset:100 -; GFX11-NEXT: scratch_load_b32 v127, off, s32 offset:104 -; GFX11-NEXT: scratch_load_b32 v126, off, s32 offset:108 -; GFX11-NEXT: scratch_load_b32 v125, off, s32 offset:112 -; GFX11-NEXT: scratch_load_b32 v124, off, s32 offset:116 -; GFX11-NEXT: scratch_load_b32 v123, off, s32 offset:120 -; GFX11-NEXT: scratch_load_b32 v122, off, s32 offset:124 -; GFX11-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-NEXT: scratch_load_b32 v121, off, s32 offset:128 -; GFX11-NEXT: scratch_load_b32 v120, off, s32 offset:132 -; GFX11-NEXT: scratch_load_b32 v111, off, s32 offset:136 -; GFX11-NEXT: scratch_load_b32 v110, off, s32 offset:140 -; GFX11-NEXT: scratch_load_b32 v109, off, s32 offset:144 -; GFX11-NEXT: scratch_load_b32 v108, off, s32 offset:148 -; GFX11-NEXT: scratch_load_b32 v107, off, s32 offset:152 -; GFX11-NEXT: scratch_load_b32 v106, off, s32 offset:156 -; GFX11-NEXT: scratch_load_b32 v105, off, s32 offset:160 -; GFX11-NEXT: scratch_load_b32 v104, off, s32 offset:164 -; GFX11-NEXT: scratch_load_b32 v95, off, s32 offset:168 -; GFX11-NEXT: scratch_load_b32 v94, off, s32 offset:172 -; GFX11-NEXT: scratch_load_b32 v93, off, s32 offset:176 -; GFX11-NEXT: scratch_load_b32 v92, off, s32 offset:180 -; GFX11-NEXT: scratch_load_b32 v91, off, s32 offset:184 -; GFX11-NEXT: scratch_load_b32 v90, off, s32 offset:188 -; GFX11-NEXT: scratch_load_b32 v89, off, s32 offset:192 -; GFX11-NEXT: scratch_load_b32 v88, off, s32 offset:196 -; GFX11-NEXT: scratch_load_b32 v79, off, s32 offset:200 -; GFX11-NEXT: scratch_load_b32 v78, off, s32 offset:204 -; GFX11-NEXT: scratch_load_b32 v77, off, s32 offset:208 -; GFX11-NEXT: scratch_load_b32 v76, off, s32 offset:212 -; GFX11-NEXT: scratch_load_b32 v75, off, s32 offset:216 -; GFX11-NEXT: scratch_load_b32 v74, off, s32 offset:220 -; GFX11-NEXT: scratch_load_b32 v73, off, s32 offset:224 -; GFX11-NEXT: scratch_load_b32 v72, off, s32 offset:228 -; GFX11-NEXT: scratch_load_b32 v63, off, s32 offset:232 -; GFX11-NEXT: scratch_load_b32 v62, off, s32 offset:236 -; GFX11-NEXT: scratch_load_b32 v61, off, s32 offset:240 -; GFX11-NEXT: scratch_load_b32 v60, off, s32 offset:244 -; GFX11-NEXT: scratch_load_b32 v59, off, s32 offset:248 -; GFX11-NEXT: scratch_load_b32 v58, off, s32 offset:252 -; GFX11-NEXT: s_clause 0x9 ; 40-byte Folded Reload -; GFX11-NEXT: scratch_load_b32 v57, off, s32 offset:256 -; GFX11-NEXT: scratch_load_b32 v56, off, s32 offset:260 -; GFX11-NEXT: scratch_load_b32 v47, off, s32 offset:264 -; GFX11-NEXT: scratch_load_b32 v46, off, s32 offset:268 -; GFX11-NEXT: scratch_load_b32 v45, off, s32 offset:272 -; GFX11-NEXT: scratch_load_b32 v44, off, s32 offset:276 -; GFX11-NEXT: scratch_load_b32 v43, off, s32 offset:280 -; GFX11-NEXT: scratch_load_b32 v42, off, s32 offset:284 -; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:288 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:292 -; GFX11-NEXT: v_dual_mov_b32 v2, v52 :: v_dual_mov_b32 v5, v67 -; GFX11-NEXT: v_dual_mov_b32 v10, v112 :: v_dual_mov_b32 v13, v151 -; GFX11-NEXT: v_dual_mov_b32 v25, v182 :: v_dual_mov_b32 v26, v181 -; GFX11-NEXT: v_dual_mov_b32 v27, v180 :: v_dual_mov_b32 v28, v179 -; GFX11-NEXT: v_dual_mov_b32 v29, v178 :: v_dual_mov_b32 v30, v177 -; GFX11-NEXT: v_mov_b32_e32 v31, v176 -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; GFX11-NEXT: .LBB71_4: -; GFX11-NEXT: ; implicit-def: $vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78 -; GFX11-NEXT: ; implicit-def: $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79 -; GFX11-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-NEXT: ; implicit-def: $vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81 -; GFX11-NEXT: ; implicit-def: $vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84 -; GFX11-NEXT: ; implicit-def: $vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88 -; GFX11-NEXT: ; implicit-def: $vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93 -; GFX11-NEXT: ; implicit-def: $vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99 -; GFX11-NEXT: ; implicit-def: $vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106 -; GFX11-NEXT: ; implicit-def: $vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114 -; GFX11-NEXT: ; implicit-def: $vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123 -; GFX11-NEXT: ; implicit-def: $vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133 -; GFX11-NEXT: ; implicit-def: $vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144 -; GFX11-NEXT: ; implicit-def: $vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156 -; GFX11-NEXT: ; implicit-def: $vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169 +; GFX11-NEXT: ; implicit-def: $vgpr0 +; GFX11-NEXT: ; implicit-def: $vgpr1 +; GFX11-NEXT: ; implicit-def: $vgpr2 +; GFX11-NEXT: ; implicit-def: $vgpr3 +; GFX11-NEXT: ; implicit-def: $vgpr4 +; GFX11-NEXT: ; implicit-def: $vgpr5 +; GFX11-NEXT: ; implicit-def: $vgpr6 +; GFX11-NEXT: ; implicit-def: $vgpr7 +; GFX11-NEXT: ; implicit-def: $vgpr8 +; GFX11-NEXT: ; implicit-def: $vgpr9 +; GFX11-NEXT: ; implicit-def: $vgpr10 +; GFX11-NEXT: ; implicit-def: $vgpr11 +; GFX11-NEXT: ; implicit-def: $vgpr12 +; GFX11-NEXT: ; implicit-def: $vgpr13 +; GFX11-NEXT: ; implicit-def: $vgpr14 +; GFX11-NEXT: ; implicit-def: $vgpr15 ; GFX11-NEXT: s_branch .LBB71_2 %cmp = icmp eq i32 %b, 0 br i1 %cmp, label %cmp.true, label %cmp.false @@ -139029,1076 +136617,704 @@ define inreg <16 x double> @bitcast_v64bf16_to_v16f64_scalar(<64 x bfloat> inreg ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:156 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:28 -; GFX11-TRUE16-NEXT: s_clause 0x6 ; 28-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v167, v13 :: v_dual_mov_b32 v176, v12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v177, v11 :: v_dual_mov_b32 v178, v10 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v179, v9 :: v_dual_mov_b32 v180, v8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v181, v7 :: v_dual_mov_b32 v182, v6 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v183, v5 :: v_dual_mov_b32 v168, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v169, v3 :: v_dual_mov_b32 v170, v2 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v171, v1 :: v_dual_mov_b32 v172, v0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v174, s28 :: v_dual_mov_b32 v173, s29 -; GFX11-TRUE16-NEXT: s_mov_b32 s4, 0 -; GFX11-TRUE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo -; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB79_4 -; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v135, s0 :: v_dual_mov_b32 v134, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v132, s2 :: v_dual_mov_b32 v129, s3 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v125, s16 :: v_dual_mov_b32 v120, s17 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v114, s18 :: v_dual_mov_b32 v107, s19 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v99, s20 :: v_dual_mov_b32 v90, s21 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v80, s22 :: v_dual_mov_b32 v69, s23 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v57, s24 :: v_dual_mov_b32 v44, s25 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, s26 :: v_dual_mov_b32 v15, s27 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 -; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB79_3 -; GFX11-TRUE16-NEXT: .LBB79_2: ; %cmp.true -; GFX11-TRUE16-NEXT: s_lshl_b32 s5, s27, 16 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s27, 0xffff0000 -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s5 -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s26, 0xffff0000 -; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s26, 16 -; GFX11-TRUE16-NEXT: v_add_f32_e64 v3, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: v_bfe_u32 v4, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_bfe_u32 v7, v3, 16, 1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, v4, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_and_b32 s5, s25, 0xffff0000 -; GFX11-TRUE16-NEXT: s_lshl_b32 s7, s25, 16 -; GFX11-TRUE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 0x7fff, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v0 -; GFX11-TRUE16-NEXT: v_add_f32_e64 v6, 0x40c00000, s5 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s24, 0xffff0000 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v2, v8 :: v_dual_add_nc_u32 v7, v7, v3 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v3 -; GFX11-TRUE16-NEXT: v_bfe_u32 v8, v5, 16, 1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v4, v9, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v4, 0x40c00000, s7 -; GFX11-TRUE16-NEXT: v_bfe_u32 v9, v6, 16, 1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v15, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, 0x400000, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 0x7fff, v7 -; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v4, 16, 1 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s24, 16 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v2, v7, v2 :: v_dual_add_nc_u32 v7, v8, v5 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, v9, v6 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, 0x400000, v5 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v3, v4 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v7 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 0x7fff, v8 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, 0x400000, v6 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v6, v7, v8, vcc_lo -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc_lo -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v6 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v30, 16, v3 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v30.h, v1.l -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc_lo -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v44, 16, v2 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v44.h, v4.l -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s23, 0xffff0000 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v57, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v57.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s23, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s22, 0xffff0000 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v69, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v69.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s22, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s21, 0xffff0000 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v80, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v80.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s21, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s20, 0xffff0000 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v90, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v90.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s20, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s19, 0xffff0000 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v99, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v99.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s19, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s18, 0xffff0000 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v107, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v107.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s17, 0xffff0000 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v114, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v114.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s17, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s16, 0xffff0000 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v120, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v120.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s16, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: s_and_b32 s4, s3, 0xffff0000 -; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s3, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v125, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v125.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s3 -; GFX11-TRUE16-NEXT: s_and_b32 s3, s2, 0xffff0000 -; GFX11-TRUE16-NEXT: s_lshl_b32 s2, s2, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v129, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v129.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s3 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s2 -; GFX11-TRUE16-NEXT: s_and_b32 s2, s1, 0xffff0000 -; GFX11-TRUE16-NEXT: s_lshl_b32 s1, s1, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v132, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v132.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s2 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s1 -; GFX11-TRUE16-NEXT: s_and_b32 s1, s0, 0xffff0000 -; GFX11-TRUE16-NEXT: s_lshl_b32 s0, s0, 16 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v134, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v134.h, v0.l -; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s1 -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s0 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v135, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v135.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v167 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v167 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v167, 16, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v167.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v176 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v176 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v176, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v176.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v177 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, v13 :: v_dual_mov_b32 v30, v12 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v9 :: v_dual_mov_b32 v26, v8 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s28 :: v_dual_mov_b32 v17, s29 +; GFX11-TRUE16-NEXT: s_mov_b32 s4, 0 +; GFX11-TRUE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo +; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB79_4 +; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v5, s17 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s18 :: v_dual_mov_b32 v7, s19 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s20 :: v_dual_mov_b32 v9, s21 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s22 :: v_dual_mov_b32 v11, s23 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s24 :: v_dual_mov_b32 v13, s25 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s26 :: v_dual_mov_b32 v15, s27 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 +; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB79_3 +; GFX11-TRUE16-NEXT: .LBB79_2: ; %cmp.true +; GFX11-TRUE16-NEXT: s_lshl_b32 s5, s27, 16 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s27, 0xffff0000 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s5 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s26, 0xffff0000 +; GFX11-TRUE16-NEXT: s_lshl_b32 s6, s26, 16 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v3, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_bfe_u32 v5, v1, 16, 1 +; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v0, 16, 1 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v0 ; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v177 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_or_b32_e32 v9, 0x400000, v1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v5, v5, v1 +; GFX11-TRUE16-NEXT: v_bfe_u32 v8, v3, 16, 1 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v4, 0x40c00000, s6 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v10, 0x400000, v3 +; GFX11-TRUE16-NEXT: s_and_b32 s5, s25, 0xffff0000 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v5, 0x7fff, v5 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v0 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v6, 0x40c00000, s5 +; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s25, 16 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v177, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v177.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v178 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v178 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v2, v7, vcc_lo ; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v178, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v178.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v179 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v179 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 +; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v4, 16, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v5, v9, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, v8, v3 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v4 +; GFX11-TRUE16-NEXT: v_bfe_u32 v5, v6, 16, 1 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v15, 16, v1 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v15.h, v0.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v7, 0x7fff, v8 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v5, v6 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s24, 0xffff0000 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v3, v7, v10, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v179, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v179.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v180 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v180 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_add_f32_e64 v7, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, 0x400000, v6 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v3 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v4 +; GFX11-TRUE16-NEXT: v_bfe_u32 v9, v5, 16, 1 +; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s24, 16 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 +; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v7, 16, 1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, v9, v5 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, 0x400000, v7 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v14, 16, v1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc_lo +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.h, v0.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, v3, v7 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v5 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v4, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s23, 0xffff0000 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x7fff, v0 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v8, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v4, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7 +; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s23, 16 +; GFX11-TRUE16-NEXT: v_bfe_u32 v5, v8, 16, 1 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v13, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v3, v4 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v3, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s22, 0xffff0000 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.h, v1.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v5, v8 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v180, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v180.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v181 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v181 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v4 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v9, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4 +; GFX11-TRUE16-NEXT: v_bfe_u32 v7, v3, 16, 1 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, 0x400000, v8 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s22, 16 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v5, v9, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, v7, v3 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v9 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v12, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v5, v9 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.h, v0.l +; GFX11-TRUE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s21, 0xffff0000 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v181, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v181.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v182 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v182 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX11-TRUE16-NEXT: v_bfe_u32 v8, v5, 16, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v6, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s21, 16 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v4 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, 0x400000, v3 +; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v6, 16, 1 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v182, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v182.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v183 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v183 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v183, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v183.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v168 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v168 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v9, v9 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, v8, v5 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v11, 16, v1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc_lo +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.h, v0.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, v3, v6 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v5 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v4, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s20, 0xffff0000 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x7fff, v0 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v8, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v4, 16, 1 +; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s20, 16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v6 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 +; GFX11-TRUE16-NEXT: v_bfe_u32 v5, v8, 16, 1 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v10, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v3, v4 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v3, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s19, 0xffff0000 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.h, v1.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v5, v8 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v168, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v168.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v169 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v169 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v4 +; GFX11-TRUE16-NEXT: v_bfe_u32 v7, v3, 16, 1 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v32, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, 0x400000, v8 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, v7, v3 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v32 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v5, v32, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8 +; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s19, 16 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v9, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v5, v32 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.h, v0.l +; GFX11-TRUE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s18, 0xffff0000 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v169, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v169.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v170 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v170 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX11-TRUE16-NEXT: v_bfe_u32 v8, v5, 16, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v6, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s18, 16 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v4 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, 0x400000, v3 +; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v6, 16, 1 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, v8, v5 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v8, 16, v1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc_lo +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.h, v0.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, v3, v6 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v5 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v4, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s17, 0xffff0000 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x7fff, v0 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v32, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v6 +; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s17, 16 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_bfe_u32 v5, v32, 16, 1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v4, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v6, 0x400000, v32 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc_lo +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v7, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v3, v4 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v3, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.h, v1.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v5, v32 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v4 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v170, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v170.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v171 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_bfe_u32 v33, v3, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s16, 0xffff0000 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v171 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_add_f32_e64 v34, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, v33, v3 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_bfe_u32 v5, v34, 16, 1 +; GFX11-TRUE16-NEXT: s_lshl_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc_lo +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, v0.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v5, v34 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v32, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v4 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, 0x400000, v3 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3 +; GFX11-TRUE16-NEXT: s_and_b32 s4, s3, 0xffff0000 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v171, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v171.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v172 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v172 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_add_f32_e64 v33, 0x40c00000, s4 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_bfe_u32 v35, v32, 16, 1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: s_lshl_b32 s3, s3, 16 +; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v33, 16, 1 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v34, 0x40c00000, s3 +; GFX11-TRUE16-NEXT: s_and_b32 s3, s2, 0xffff0000 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, v35, v32 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v35, 0x40c00000, s3 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, v0.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, v3, v33 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v32 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x7fff, v0 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v4, 0x400000, v33 +; GFX11-TRUE16-NEXT: v_bfe_u32 v32, v35, 16, 1 +; GFX11-TRUE16-NEXT: s_lshl_b32 s2, s2, 16 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v34, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v33, v33 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v33, 0x400000, v35 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v3, v34 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.h, v1.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v32, v35 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v32, 0x40c00000, s2 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v34 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v172, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v172.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v173 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: s_and_b32 s2, s1, 0xffff0000 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_lshlrev_b32 v1, 16, v173 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_add_f32_e64 v37, 0x40c00000, s2 +; GFX11-TRUE16-NEXT: v_bfe_u32 v36, v32, 16, 1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_bfe_u32 v34, v37, 16, 1 +; GFX11-TRUE16-NEXT: s_lshl_b32 s1, s1, 16 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v33, vcc_lo +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, v36, v32 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v34, v37 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v34, 0x40c00000, s1 +; GFX11-TRUE16-NEXT: s_and_b32 s1, s0, 0xffff0000 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v0.l +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v33 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v33, 0x400000, v32 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v35, 0x40c00000, s1 ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v173, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v173.h, v0.l -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v174 -; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v37 +; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v34, 16, 1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v33, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v37, v37 +; GFX11-TRUE16-NEXT: v_bfe_u32 v32, v35, 16, 1 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v37, 0x400000, v35 +; GFX11-TRUE16-NEXT: s_lshl_b32 s0, s0, 16 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v33, v2, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v36, v38, v34 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v0.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, v32, v35 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v33 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v33, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v31 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v0, 0x7fff, v0 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v36 +; GFX11-TRUE16-NEXT: v_add_f32_e64 v36, 0x40c00000, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v1, v33, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-TRUE16-NEXT: v_bfe_u32 v33, v36, 16, 1 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v34, 0x40c00000, v38 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v35, 0x400000, v36 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v0, v0, v37 :: v_dual_add_nc_u32 v33, v33, v36 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v32.l +; GFX11-TRUE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v30 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v0 +; GFX11-TRUE16-NEXT: v_bfe_u32 v0, v34, 16, 1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v30, 16, v30 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v33, v33, v35 :: v_dual_add_nc_u32 v0, v0, v34 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v37 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v37, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v36, 0x7fff, v0 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v31, 16, v31 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v33 +; GFX11-TRUE16-NEXT: v_bfe_u32 v33, v35, 16, 1 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v32.l +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v34, v36, v37 :: v_dual_add_f32 v31, 0x40c00000, v31 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v30, 0x40c00000, v30 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, v33, v35 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v37, 0x400000, v35 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v34 +; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v31, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v31, v31 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v36, v38, v31 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v29 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v29, 16, v29 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v36 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v31 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v29, 0x40c00000, v29 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v31, v34, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v34, v30, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v38 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v30 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v31, 16, v31 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, v34, v30 +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v32, v32, v37 :: v_dual_and_b32 v37, 0xffff0000, v28 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v31.h, v33.l +; GFX11-TRUE16-NEXT: v_bfe_u32 v33, v35, 16, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v30, v30 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v28, 16, v28 +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v30, v34, v36 :: v_dual_add_nc_u32 v33, v33, v35 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v34, 0x40c00000, v37 +; GFX11-TRUE16-NEXT: v_bfe_u32 v37, v29, 16, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v35 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v30, 16, v30 +; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v34, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-TRUE16-NEXT: v_dual_add_f32 v28, 0x40c00000, v28 :: v_dual_add_nc_u32 v35, v37, v29 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v30.h, v32.l +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v33, v33, v36 :: v_dual_add_nc_u32 v32, v38, v34 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v29 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v37, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v29, v29 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v27 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v29, v35, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v35, v28, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v34, 0x40c00000, v38 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v28 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v29, 16, v29 +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v32, v32, v37 :: v_dual_add_nc_u32 v35, v35, v28 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v26 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v26, 16, v26 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v27, 16, v27 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.h, v33.l +; GFX11-TRUE16-NEXT: v_bfe_u32 v33, v34, 16, 1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v28, v28 +; GFX11-TRUE16-NEXT: v_dual_add_f32 v26, 0x40c00000, v26 :: v_dual_add_f32 v27, 0x40c00000, v27 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, v33, v34 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v28, v35, v36 :: v_dual_add_f32 v35, 0x40c00000, v37 +; GFX11-TRUE16-NEXT: v_bfe_u32 v37, v27, 16, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v35, 16, 1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, v37, v27 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v28, 16, v28 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v28.h, v32.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v33, v33, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v27 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v27, v27 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, v38, v35 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v37, 0x400000, v35 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v25 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v27, v34, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v34, v26, 16, 1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v26 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v38 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, v34, v26 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v27, 16, v27 +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v32, v32, v37 :: v_dual_and_b32 v37, 0xffff0000, v24 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v26, v26 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v24, 16, v24 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v25, 16, v25 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.h, v33.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v26, v34, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_add_f32_e32 v34, 0x40c00000, v37 +; GFX11-TRUE16-NEXT: v_bfe_u32 v33, v35, 16, 1 +; GFX11-TRUE16-NEXT: v_dual_add_f32 v24, 0x40c00000, v24 :: v_dual_add_f32 v25, 0x40c00000, v25 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v26, 16, v26 +; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v34, 16, 1 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v26.h, v32.l +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v35 +; GFX11-TRUE16-NEXT: v_bfe_u32 v37, v25, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, v38, v34 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v23 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v23, 16, v23 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, v33, v35 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, v37, v25 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v37, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v23, 0x40c00000, v23 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v33, v33, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v25 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v25, v25 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v25, v35, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v35, v24, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v34, 0x40c00000, v38 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v24 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v25, 16, v25 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.h, v33.l +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v32, v32, v37 :: v_dual_and_b32 v37, 0xffff0000, v22 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v22, 16, v22 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, v35, v24 +; GFX11-TRUE16-NEXT: v_bfe_u32 v33, v34, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v24, v24 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v22, 0x40c00000, v22 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, v1, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 16, v174 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_add_f32_e32 v1, 0x40c00000, v1 -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1 -; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, v2, v1 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 -; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v24, v35, v36 :: v_dual_add_nc_u32 v33, v33, v34 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v37 +; GFX11-TRUE16-NEXT: v_bfe_u32 v37, v23, 16, 1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v35, 16, 1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, v37, v23 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v24, 16, v24 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.h, v32.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v33, v33, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v23 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v23, v23 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, v38, v35 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v37, 0x400000, v35 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v21 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v23, v34, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v34, v22, 16, 1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v22 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v38 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, v34, v22 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v23, 16, v23 +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v32, v32, v37 :: v_dual_and_b32 v37, 0xffff0000, v20 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v22, v22 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.h, v33.l +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-TRUE16-NEXT: v_bfe_u32 v33, v35, 16, 1 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v20, 16, v20 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v22, v34, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_add_f32_e32 v34, 0x40c00000, v37 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v35 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, v33, v35 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v22, 16, v22 +; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v34, 16, 1 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.h, v32.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v20, 0x40c00000, v20 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, v38, v34 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v19 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v21, 16, v21 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v33, v33, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v19, 16, v19 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_f32_e32 v21, 0x40c00000, v21 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_add_f32_e32 v19, 0x40c00000, v19 +; GFX11-TRUE16-NEXT: v_bfe_u32 v37, v21, 16, 1 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v21 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v21, v21 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, v37, v21 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v37, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v174, 16, v1 -; GFX11-TRUE16-NEXT: v_mov_b16_e64 v174.h, v0.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v21, v35, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_bfe_u32 v35, v20, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v34, 0x40c00000, v38 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v20 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v21, 16, v21 +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v32, v32, v37 :: v_dual_add_nc_u32 v35, v35, v20 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v33.l +; GFX11-TRUE16-NEXT: v_bfe_u32 v33, v34, 16, 1 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v18 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v20, v20 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v18, 16, v18 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, v33, v34 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v20, v35, v36 :: v_dual_add_f32 v35, 0x40c00000, v37 +; GFX11-TRUE16-NEXT: v_bfe_u32 v37, v19, 16, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v35, 16, 1 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, v37, v19 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v18, 0x40c00000, v18 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v20, 16, v20 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v33, v33, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v19 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v19, v19 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.h, v32.l +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, v38, v35 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v17 +; GFX11-TRUE16-NEXT: v_bfe_u32 v39, v18, 16, 1 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v19, v34, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v38, 0x400000, v35 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v34, 0x40c00000, v37 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, v39, v18 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v19, 16, v19 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v33.l +; GFX11-TRUE16-NEXT: v_bfe_u32 v36, v34, 16, 1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v32, v32, v38 :: v_dual_add_nc_u32 v33, 0x7fff, v35 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v35, 0x400000, v18 +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v17, 16, v17 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v16 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v18, v18 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v36, v36, v34 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_dual_add_f32 v17, 0x40c00000, v17 :: v_dual_lshlrev_b32 v16, 16, v16 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v18, v33, v35 :: v_dual_add_f32 v33, 0x40c00000, v37 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v36 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v36, 0x400000, v34 +; GFX11-TRUE16-NEXT: v_bfe_u32 v37, v17, 16, 1 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-TRUE16-NEXT: v_bfe_u32 v38, v33, 16, 1 +; GFX11-TRUE16-NEXT: v_add_f32_e32 v16, 0x40c00000, v16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v39, 0x400000, v17 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v18, 16, v18 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v34, v35, v36, vcc_lo +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v36, v37, v17 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v37, v38, v33 +; GFX11-TRUE16-NEXT: v_bfe_u32 v35, v16, 16, 1 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v38, 0x400000, v33 +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v33, v33 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v36, 0x7fff, v36 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v37, 0x7fff, v37 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, v35, v16 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v48, 0x400000, v16 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v34 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v32.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v33, v37, v38, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v17, v17 +; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v17, v36, v39, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v16, v16 +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v17, 16, v17 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v16, v35, v48, vcc_lo +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.h, v34.l +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v16, 16, v16 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.h, v33.l ; GFX11-TRUE16-NEXT: .LBB79_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, v125 :: v_dual_mov_b32 v5, v120 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, v114 :: v_dual_mov_b32 v7, v107 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, v99 :: v_dual_mov_b32 v9, v90 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, v57 :: v_dual_mov_b32 v13, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, v30 :: v_dual_mov_b32 v17, v173 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, v174 :: v_dual_mov_b32 v19, v171 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v18, v172 :: v_dual_mov_b32 v21, v169 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, v170 :: v_dual_mov_b32 v23, v183 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v22, v168 :: v_dual_mov_b32 v25, v181 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0x6 ; 28-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:280 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, v135 :: v_dual_mov_b32 v1, v134 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, v132 :: v_dual_mov_b32 v3, v129 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, v80 :: v_dual_mov_b32 v11, v69 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v24, v182 :: v_dual_mov_b32 v27, v179 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v180 :: v_dual_mov_b32 v29, v177 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v178 :: v_dual_mov_b32 v31, v167 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v30, v176 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB79_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 ; GFX11-TRUE16-NEXT: s_branch .LBB79_2 ; ; GFX11-FAKE16-LABEL: bitcast_v64bf16_to_v16f64_scalar: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 -; GFX11-FAKE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v40, s32 offset:288 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v41, s32 offset:284 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v42, s32 offset:280 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v43, s32 offset:276 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v44, s32 offset:272 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v45, s32 offset:268 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v46, s32 offset:264 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v47, s32 offset:260 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v56, s32 offset:256 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v57, s32 offset:252 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v58, s32 offset:248 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v59, s32 offset:244 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v60, s32 offset:240 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v61, s32 offset:236 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v62, s32 offset:232 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v63, s32 offset:228 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v72, s32 offset:224 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v73, s32 offset:220 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v74, s32 offset:216 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v75, s32 offset:212 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v76, s32 offset:208 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v77, s32 offset:204 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v78, s32 offset:200 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v79, s32 offset:196 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v88, s32 offset:192 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v89, s32 offset:188 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v90, s32 offset:184 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v91, s32 offset:180 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v92, s32 offset:176 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v93, s32 offset:172 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v94, s32 offset:168 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v95, s32 offset:164 -; GFX11-FAKE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v104, s32 offset:160 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v105, s32 offset:156 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v106, s32 offset:152 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v107, s32 offset:148 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v108, s32 offset:144 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v109, s32 offset:140 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v110, s32 offset:136 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v111, s32 offset:132 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v120, s32 offset:128 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v121, s32 offset:124 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v122, s32 offset:120 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v123, s32 offset:116 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v124, s32 offset:112 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v125, s32 offset:108 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v126, s32 offset:104 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v127, s32 offset:100 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v136, s32 offset:96 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v137, s32 offset:92 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v138, s32 offset:88 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v139, s32 offset:84 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v140, s32 offset:80 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v141, s32 offset:76 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v142, s32 offset:72 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v143, s32 offset:68 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v152, s32 offset:64 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v153, s32 offset:60 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v154, s32 offset:56 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v155, s32 offset:52 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v156, s32 offset:48 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v157, s32 offset:44 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v158, s32 offset:40 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v159, s32 offset:36 -; GFX11-FAKE16-NEXT: s_clause 0x8 ; 36-byte Folded Spill -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v168, s32 offset:32 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v169, s32 offset:28 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v170, s32 offset:24 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v171, s32 offset:20 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v172, s32 offset:16 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v173, s32 offset:12 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v174, s32 offset:8 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v175, s32 offset:4 -; GFX11-FAKE16-NEXT: scratch_store_b32 off, v184, s32 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v178, v13 :: v_dual_mov_b32 v179, v12 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v180, v11 :: v_dual_mov_b32 v181, v9 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v182, v10 :: v_dual_mov_b32 v169, v7 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v170, v8 :: v_dual_mov_b32 v177, v3 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v176, v6 :: v_dual_mov_b32 v171, v4 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v174, v5 :: v_dual_mov_b32 v173, v0 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v184, v2 :: v_dual_mov_b32 v175, v1 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v183, s28 :: v_dual_mov_b32 v172, s29 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v31, v13 :: v_dual_mov_b32 v30, v12 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v27, v9 :: v_dual_mov_b32 v26, v8 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v16, s28 :: v_dual_mov_b32 v17, s29 ; GFX11-FAKE16-NEXT: s_mov_b32 s4, 0 ; GFX11-FAKE16-NEXT: s_and_b32 s5, vcc_lo, exec_lo ; GFX11-FAKE16-NEXT: s_cbranch_scc0 .LBB79_4 ; GFX11-FAKE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v32, s0 :: v_dual_mov_b32 v37, s2 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v34, s1 :: v_dual_mov_b32 v41, s3 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v46, s16 :: v_dual_mov_b32 v59, s18 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v52, s17 :: v_dual_mov_b32 v67, s19 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v76, s20 :: v_dual_mov_b32 v97, s22 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v86, s21 :: v_dual_mov_b32 v109, s23 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v122, s24 :: v_dual_mov_b32 v151, s26 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v136, s25 :: v_dual_mov_b32 v15, s27 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v5, s17 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v6, s18 :: v_dual_mov_b32 v7, s19 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v8, s20 :: v_dual_mov_b32 v9, s21 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v10, s22 :: v_dual_mov_b32 v11, s23 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v12, s24 :: v_dual_mov_b32 v13, s25 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v14, s26 :: v_dual_mov_b32 v15, s27 ; GFX11-FAKE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 ; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB79_3 ; GFX11-FAKE16-NEXT: .LBB79_2: ; %cmp.true @@ -140106,762 +137322,674 @@ define inreg <16 x double> @bitcast_v64bf16_to_v16f64_scalar(<64 x bfloat> inreg ; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s27, 16 ; GFX11-FAKE16-NEXT: v_add_f32_e64 v1, 0x40c00000, s5 ; GFX11-FAKE16-NEXT: v_add_f32_e64 v0, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s26, 16 ; GFX11-FAKE16-NEXT: s_and_b32 s4, s26, 0xffff0000 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s6 +; GFX11-FAKE16-NEXT: s_lshl_b32 s6, s26, 16 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v3, 0x40c00000, s4 ; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v1, 16, 1 ; GFX11-FAKE16-NEXT: v_bfe_u32 v2, v0, 16, 1 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s6 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, 0x400000, v1 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v3, 0x40c00000, s4 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v8, 0x400000, v0 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v1 +; GFX11-FAKE16-NEXT: v_bfe_u32 v9, v3, 16, 1 ; GFX11-FAKE16-NEXT: v_bfe_u32 v10, v5, 16, 1 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-FAKE16-NEXT: v_bfe_u32 v9, v3, 16, 1 -; GFX11-FAKE16-NEXT: s_lshl_b32 s7, s25, 16 +; GFX11-FAKE16-NEXT: s_and_b32 s5, s25, 0xffff0000 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 0x7fff, v4 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, v2, v0 -; GFX11-FAKE16-NEXT: s_and_b32 s5, s25, 0xffff0000 -; GFX11-FAKE16-NEXT: s_and_b32 s4, s24, 0xffff0000 ; GFX11-FAKE16-NEXT: v_add_f32_e64 v6, 0x40c00000, s5 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v51, 0xffff0000, v183 +; GFX11-FAKE16-NEXT: s_lshl_b32 s7, s25, 16 +; GFX11-FAKE16-NEXT: s_and_b32 s4, s24, 0xffff0000 +; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s24, 16 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v8, vcc_lo +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, v9, v3 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, v10, v5 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, v9, v3 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, 0x400000, v5 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, 0x400000, v3 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX11-FAKE16-NEXT: v_bfe_u32 v10, v6, 16, 1 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v4, v7, vcc_lo +; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v1, v4, v7 :: v_dual_add_nc_u32 v2, 0x7fff, v2 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v5 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 0x7fff, v8 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 ; GFX11-FAKE16-NEXT: v_add_f32_e64 v8, 0x40c00000, s7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v3 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v2 +; GFX11-FAKE16-NEXT: v_bfe_u32 v5, v6, 16, 1 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v4, v7, v4, vcc_lo ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3 -; GFX11-FAKE16-NEXT: v_bfe_u32 v3, v8, 16, 1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-FAKE16-NEXT: v_bfe_u32 v7, v8, 16, 1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-FAKE16-NEXT: v_lshl_or_b32 v15, v1, 16, v0 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, v3, v8 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v4 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc_lo +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v5, v6 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, 0x400000, v6 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, v7, v8 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v8 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v7, 0x40c00000, s5 +; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s23, 16 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v14, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v5 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s4 +; GFX11-FAKE16-NEXT: v_bfe_u32 v8, v7, 16, 1 +; GFX11-FAKE16-NEXT: s_and_b32 s4, s23, 0xffff0000 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v5, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, v8, v7 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v8, 0x40c00000, s5 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v5 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc_lo +; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, 0x400000, v5 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7 +; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s22, 16 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v6 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v7 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v6, 0x40c00000, s4 +; GFX11-FAKE16-NEXT: v_bfe_u32 v7, v8, 16, 1 +; GFX11-FAKE16-NEXT: s_and_b32 s4, s22, 0xffff0000 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v6, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, v7, v8 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v7, 0x40c00000, s5 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v6 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc_lo +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v13, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v5 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v8 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, 0x400000, v6 +; GFX11-FAKE16-NEXT: v_bfe_u32 v8, v7, 16, 1 +; GFX11-FAKE16-NEXT: s_and_b32 s4, s21, 0xffff0000 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v5, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, v8, v7 +; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s21, 16 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v5 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc_lo +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v12, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v8, 0x40c00000, s5 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v6 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v7 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v6, 0x40c00000, s4 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, 0x400000, v5 +; GFX11-FAKE16-NEXT: v_bfe_u32 v7, v8, 16, 1 +; GFX11-FAKE16-NEXT: s_and_b32 s4, s20, 0xffff0000 +; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s20, 16 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v6, 16, 1 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, v10, v6 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v1, 0x7fff, v1 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v4, v7, v9, vcc_lo -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, v7, v8 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v11, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v6 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc_lo +; GFX11-FAKE16-NEXT: v_add_f32_e64 v7, 0x40c00000, s5 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v5 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v8 ; GFX11-FAKE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s24, 16 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v4 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v6 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v7, 0x400000, v8 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v9, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: s_and_b32 s4, s23, 0xffff0000 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, 0x400000, v6 +; GFX11-FAKE16-NEXT: v_bfe_u32 v8, v7, 16, 1 +; GFX11-FAKE16-NEXT: s_and_b32 s4, s19, 0xffff0000 ; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo ; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v5, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v8, 0x400000, v5 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v10, 0x400000, v9 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, v8, v7 +; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s19, 16 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v5 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v6, v1, v7 :: v_dual_and_b32 v1, 0xffff, v2 -; GFX11-FAKE16-NEXT: v_bfe_u32 v7, v9, 16, 1 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v3 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 0x7fff, v4 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v6 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, v7, v9 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v7, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s23, 16 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v151, v0, 16, v1 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v12, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 0x7fff, v6 -; GFX11-FAKE16-NEXT: v_bfe_u32 v11, v7, 16, 1 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v5, v4, v8, vcc_lo -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v9, v9 -; GFX11-FAKE16-NEXT: s_and_b32 s4, s22, 0xffff0000 -; GFX11-FAKE16-NEXT: v_bfe_u32 v9, v12, 16, 1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, v11, v7 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xffff, v3 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc_lo -; GFX11-FAKE16-NEXT: v_add_f32_e64 v10, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s22, 16 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v5 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v11, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v6 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 0x7fff, v8 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, v9, v12 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, 0x400000, v7 -; GFX11-FAKE16-NEXT: v_bfe_u32 v14, v10, 16, 1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc_lo +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v10, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v8, 0x40c00000, s5 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v6 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v7 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v6, 0x40c00000, s4 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v13, 0x400000, v12 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, 0x7fff, v8 -; GFX11-FAKE16-NEXT: s_and_b32 s4, s21, 0xffff0000 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v7, v6, v9, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v9, v11, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v12, v12 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, v14, v10 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xffff, v5 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v7 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v8, v8, v13 :: v_dual_add_nc_u32 v7, v9, v11 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v9, 0x7fff, v12 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v12, 0x400000, v10 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v13, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v10, v10 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s21, 16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v7, 0x7fff, v7 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v14, 0x400000, v11 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v16, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v9, v9, v12, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v10, v13, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v11, v11 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 16, v8 -; GFX11-FAKE16-NEXT: v_bfe_u32 v12, v16, 16, 1 -; GFX11-FAKE16-NEXT: s_and_b32 s4, s20, 0xffff0000 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v11, v7, v14 :: v_dual_add_nc_u32 v10, v10, v13 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xffff, v8 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 16, v9 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v14, 0x400000, v13 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v10, 0x7fff, v10 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v9, 16, v11 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, v12, v16 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v12, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v13, v13 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s20, 16 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v17, 0x400000, v16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, 0x7fff, v11 -; GFX11-FAKE16-NEXT: v_bfe_u32 v18, v12, 16, 1 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v19, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v13, v10, v14, vcc_lo -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v16, v16 -; GFX11-FAKE16-NEXT: s_and_b32 s4, s19, 0xffff0000 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v14, v18, v12 -; GFX11-FAKE16-NEXT: v_bfe_u32 v16, v19, 16, 1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v10, 0xffff, v9 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v11, v11, v17, vcc_lo -; GFX11-FAKE16-NEXT: v_add_f32_e64 v17, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s19, 16 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v9, 16, v13 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v13, 0x7fff, v14 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v14, v16, v19 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v16, 0x400000, v12 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v18, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_bfe_u32 v21, v17, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v12, v12 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v11, 16, v11 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v14, 0x7fff, v14 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v20, 0x400000, v19 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v9, 0x400000, v5 +; GFX11-FAKE16-NEXT: v_bfe_u32 v7, v8, 16, 1 ; GFX11-FAKE16-NEXT: s_and_b32 s4, s18, 0xffff0000 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v13, v13, v16, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v16, v18, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v19, v19 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v19, v21, v17 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xffff, v11 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v11, 16, v13 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v14, v14, v20 :: v_dual_add_nc_u32 v13, v16, v18 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v16, 0x7fff, v19 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v19, 0x400000, v17 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v20, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v17, v17 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s18, 16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v13, 0x7fff, v13 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v21, 0x400000, v18 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v22, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v16, v16, v19, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v17, v20, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v18, v18 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v14, 16, v14 -; GFX11-FAKE16-NEXT: v_bfe_u32 v19, v22, 16, 1 +; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s18, 16 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v6, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v5, v7, v8 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v7, 0x40c00000, s5 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v6 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v9, vcc_lo +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v9, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v5 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v8 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v32, 0x400000, v6 +; GFX11-FAKE16-NEXT: v_bfe_u32 v8, v7, 16, 1 ; GFX11-FAKE16-NEXT: s_and_b32 s4, s17, 0xffff0000 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, v17, v20 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v18, v13, v21 :: v_dual_and_b32 v13, 0xffff, v14 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v14, 16, v16 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v21, 0x400000, v20 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v17, 0x7fff, v17 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v16, 16, v18 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v18, v19, v22 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v19, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v20, v20 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s17, 16 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v23, 0x400000, v22 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v18, 0x7fff, v18 -; GFX11-FAKE16-NEXT: v_bfe_u32 v24, v19, 16, 1 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v25, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v20, v17, v21, vcc_lo -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v22, v22 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v5, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 +; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s17, 16 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v33, 0x400000, v5 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v5 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v32, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, v8, v7 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v8, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v6 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v7 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v6, 0x40c00000, s4 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v32, 0x40c00000, s5 ; GFX11-FAKE16-NEXT: s_and_b32 s4, s16, 0xffff0000 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v21, v24, v19 -; GFX11-FAKE16-NEXT: v_bfe_u32 v22, v25, 16, 1 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v18, v18, v23 :: v_dual_and_b32 v17, 0xffff, v16 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v23, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: s_lshl_b32 s4, s16, 16 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v16, 16, v20 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v20, 0x7fff, v21 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v21, v22, v25 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v22, 0x400000, v19 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v24, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_bfe_u32 v27, v23, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v19, v19 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v18, 16, v18 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v21, 0x7fff, v21 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v26, 0x400000, v25 +; GFX11-FAKE16-NEXT: s_lshl_b32 s5, s16, 16 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v6, 16, 1 +; GFX11-FAKE16-NEXT: v_bfe_u32 v7, v32, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v34, 0x400000, v6 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v6 +; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v2, v2, v33 :: v_dual_add_nc_u32 v5, v7, v32 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v7, v0, 16, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v5 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v32 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v5, 0x40c00000, s4 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v33, 0x40c00000, s5 ; GFX11-FAKE16-NEXT: s_and_b32 s4, s3, 0xffff0000 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v20, v20, v22, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v22, v24, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v25, v25 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v25, v27, v23 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v19, 0xffff, v18 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v18, 16, v20 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v21, v21, v26 :: v_dual_add_nc_u32 v20, v22, v24 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v22, 0x7fff, v25 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v25, 0x400000, v23 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v26, 0x40c00000, s4 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v23, v23 ; GFX11-FAKE16-NEXT: s_lshl_b32 s3, s3, 16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v20, 0x7fff, v20 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v27, 0x400000, v24 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v28, 0x40c00000, s3 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v22, v22, v25, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v23, v26, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v24, v24 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v21, 16, v21 -; GFX11-FAKE16-NEXT: v_bfe_u32 v25, v28, 16, 1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v5, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6 +; GFX11-FAKE16-NEXT: v_bfe_u32 v32, v33, 16, 1 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v6, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v35, 0x400000, v5 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v5 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v34, vcc_lo +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v33, v33 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v34, 0x40c00000, s3 ; GFX11-FAKE16-NEXT: s_and_b32 s3, s2, 0xffff0000 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v24, v20, v27 :: v_dual_add_nc_u32 v23, v23, v26 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_and_b32_e32 v20, 0xffff, v21 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v21, 16, v22 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v27, 0x400000, v26 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v23, 0x7fff, v23 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v22, 16, v24 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v24, v25, v28 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v25, 0x40c00000, s3 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v26, v26 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v33 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v32, v33 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_bfe_u32 v33, v34, 16, 1 ; GFX11-FAKE16-NEXT: s_lshl_b32 s2, s2, 16 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v29, 0x400000, v28 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v24, 0x7fff, v24 -; GFX11-FAKE16-NEXT: v_bfe_u32 v30, v25, 16, 1 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v31, 0x40c00000, s2 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v26, v23, v27, vcc_lo -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v28, v28 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v32 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v32, 0x40c00000, s4 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v33, v34 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v32, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v5, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v36, 0x400000, v32 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, v4, v32 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v35, vcc_lo +; GFX11-FAKE16-NEXT: v_add_f32_e64 v35, 0x40c00000, s2 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v33 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v4 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v4, 0x400000, v34 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v33, 0x40c00000, s3 +; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v35, 16, 1 ; GFX11-FAKE16-NEXT: s_and_b32 s2, s1, 0xffff0000 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, v30, v25 -; GFX11-FAKE16-NEXT: v_bfe_u32 v28, v31, 16, 1 ; GFX11-FAKE16-NEXT: s_lshl_b32 s1, s1, 16 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v24, v24, v29, vcc_lo -; GFX11-FAKE16-NEXT: v_add_f32_e64 v29, 0x40c00000, s2 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v23, 0xffff, v22 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v22, 16, v26 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v26, 0x7fff, v27 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, v28, v31 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v28, 0x400000, v25 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v30, 0x40c00000, s1 -; GFX11-FAKE16-NEXT: v_bfe_u32 v33, v29, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v25, v25 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v24, 16, v24 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v27, 0x7fff, v27 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v32, 0x400000, v31 -; GFX11-FAKE16-NEXT: s_and_b32 s1, s0, 0xffff0000 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v26, v26, v28, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v28, v30, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v31, v31 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, v33, v29 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v25, 0xffff, v24 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v24, 16, v26 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v27, v27, v32 :: v_dual_add_nc_u32 v26, v28, v30 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v4, v33, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v34, v35 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v33 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v28, 0x7fff, v31 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v31, 0x400000, v29 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v32, 0x40c00000, s1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v29, v29 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v4, v33 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v36, vcc_lo +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v4, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v36, 0x40c00000, s1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v32 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v3, 0x7fff, v34 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v32, 0x400000, v35 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v34, 0x40c00000, s2 +; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v36, 16, 1 +; GFX11-FAKE16-NEXT: s_and_b32 s1, s0, 0xffff0000 ; GFX11-FAKE16-NEXT: s_lshl_b32 s0, s0, 16 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v26, 0x7fff, v26 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v33, 0x400000, v30 -; GFX11-FAKE16-NEXT: v_add_f32_e64 v34, 0x40c00000, s0 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v28, v28, v31, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v29, v32, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v30, v30 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v35, 0x400000, v32 -; GFX11-FAKE16-NEXT: v_bfe_u32 v31, v34, 16, 1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v36, 0x400000, v34 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v27, 16, v27 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v30, v26, v33, vcc_lo -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v26, 16, v28 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v28, v29, v32 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v33, 16, v178 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v29, 16, v30 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v30, v31, v34 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v31, 0xffff0000, v178 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v28, 0x7fff, v28 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v33, 0x40c00000, v33 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v109, v5, 16, v7 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v30, 0x7fff, v30 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v31, 0x40c00000, v31 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v32, v28, v35, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v37, v33, 16, 1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v32, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v32, v34, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v33, v33 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, v35, v36 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v38, 0x40c00000, s0 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v3 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v32, v34 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v37, vcc_lo +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v3, v0, 16, v1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v33 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v33, 0x400000, v36 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v2, 0x7fff, v32 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v35 +; GFX11-FAKE16-NEXT: v_add_f32_e64 v35, 0x40c00000, s1 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v34 +; GFX11-FAKE16-NEXT: v_bfe_u32 v36, v38, 16, 1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v32, v32, v33, vcc_lo ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v28, 0xffff, v29 +; GFX11-FAKE16-NEXT: v_bfe_u32 v33, v35, 16, 1 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v2, v37, vcc_lo +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v2, v0, 16, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, v33, v35 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v36, v38 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v36, 0x400000, v38 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v34 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff0000, v31 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v31, 16, v31 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xffff, v32 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_dual_add_f32 v34, 0x40c00000, v34 :: v_dual_add_f32 v31, 0x40c00000, v31 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v35 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v33, v33, v36, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-FAKE16-NEXT: v_bfe_u32 v36, v34, 16, 1 ; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v31, 16, 1 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v29, 16, v32 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v37, v33 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v30, v30, v36, vcc_lo -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v36, 16, v179 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v35, v31 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v33 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v33, v33 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v179 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v31 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v0, 0x7fff, v0 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v1, v1, 16, v32 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v36, v34 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, v35, v31 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v36, 0xffff0000, v30 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v30, 16, v30 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v0, v37, vcc_lo +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v31 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 ; GFX11-FAKE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v36 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v32, v32, v37, vcc_lo +; GFX11-FAKE16-NEXT: v_add_f32_e32 v30, 0x40c00000, v30 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v31, v31 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v35 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v180 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v36 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v31, v34, v38, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v36, 16, 1 -; GFX11-FAKE16-NEXT: v_bfe_u32 v33, v35, 16, 1 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v38, 16, v180 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_dual_add_f32 v37, 0x40c00000, v37 :: v_dual_add_nc_u32 v34, v34, v36 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v33, v35 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v48, 0x400000, v35 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v38, 0x40c00000, v38 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v33, 0xffff, v33 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v34 +; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v31, v35, v37 :: v_dual_add_nc_u32 v32, 0x7fff, v32 +; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v36, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 +; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v30, 16, 1 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v0, v0, 16, v33 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v31, 16, v31 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v35, v36 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v29 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v34, v30 +; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v32, v32, v38 :: v_dual_lshlrev_b32 v29, 16, v29 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v30 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v35 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_add_f32_e32 v29, 0x40c00000, v29 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v30, v30 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v31, 0xffff, v31 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v36 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xffff, v32 -; GFX11-FAKE16-NEXT: v_bfe_u32 v36, v37, 16, 1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v30, v34, v37, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v35, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 +; GFX11-FAKE16-NEXT: v_bfe_u32 v36, v29, 16, 1 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v31, v32, 16, v31 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v30, 16, v30 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v34, v39, vcc_lo -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 -; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v38, 16, 1 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v178, v31, 16, v32 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, v36, v37 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v33, v33, v48 :: v_dual_lshlrev_b32 v36, 16, v182 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v34, 16, v34 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v38 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38 +; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v33, v33, v38 :: v_dual_add_nc_u32 v32, v34, v35 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v36 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v33 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v35, v38 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v182 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff, v34 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v48, 0x400000, v37 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, 0x7fff, v31 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v35 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v179, v32, 16, v34 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v30, 0xffff, v30 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v136, v2, 16, v4 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v33, v33, v39, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v38, v35, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v37, v37 -; GFX11-FAKE16-NEXT: v_bfe_u32 v37, v36, 16, 1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v36 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v36, v29 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v36, 0xffff0000, v28 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v28, 16, v28 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v29 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v29, v29 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v31, v31, v48 :: v_dual_add_nc_u32 v38, v38, v35 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v37, v36 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v181 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v38 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v38, 16, v181 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v48, 0x400000, v35 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v37, 0x40c00000, v37 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v31, 16, v31 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v38, 0x40c00000, v38 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v32, v32, v39, vcc_lo +; GFX11-FAKE16-NEXT: v_add_f32_e32 v28, 0x40c00000, v28 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v30, 0xffff, v30 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v36 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v35 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v29, v34, v37, vcc_lo ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v33, 0xffff, v33 -; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v37, 16, 1 -; GFX11-FAKE16-NEXT: v_bfe_u32 v36, v38, 16, 1 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v34, v48, vcc_lo -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v180, v31, 16, v33 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v35, v37 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v170 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v38 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v31, 16, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v36, v38 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_dual_add_f32 v35, 0x40c00000, v35 :: v_dual_lshlrev_b32 v36, 16, v170 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xffff, v32 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v28, 16, 1 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v36, 16, 1 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v30, v33, 16, v30 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v28 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v32, v32, v38 :: v_dual_add_nc_u32 v33, v35, v28 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v34, v36 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v27 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 16, v27 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v36 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v48, 0x400000, v37 -; GFX11-FAKE16-NEXT: v_bfe_u32 v38, v35, 16, 1 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v34, v39, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v39, v36, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v37, v37 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v182, v31, 16, v32 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v37, v38, v35 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v34, 16, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, v39, v36 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v33, v33, v48, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v28, v28 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v29, 16, v29 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v36 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v35 +; GFX11-FAKE16-NEXT: v_dual_add_f32 v27, 0x40c00000, v27 :: v_dual_cndmask_b32 v28, v33, v37 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v48, 0x400000, v35 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, 0x7fff, v31 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v29, 0xffff, v29 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_bfe_u32 v36, v27, 16, 1 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v28, 16, v28 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v33, v34, v38, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v35, 16, 1 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v29, v32, 16, v29 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v27 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v27, v27 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v33 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v33, 0xffff, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v37 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v169 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v31, v31, v38, vcc_lo +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v34, v35 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v36, v27 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v36, 0xffff0000, v26 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v26, 16, v26 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v28, 0xffff, v28 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v36 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v26, 0x40c00000, v26 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v35 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v28, v32, 16, v28 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v27, v34, v37, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v36, 16, 1 +; GFX11-FAKE16-NEXT: v_bfe_u32 v37, v26, 16, 1 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v39, 16, v169 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v181, v32, 16, v33 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v37, 0x40c00000, v37 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v176 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v34, v48, vcc_lo -; GFX11-FAKE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v39 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v31, 16, v31 -; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v37, 16, 1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v37 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v34 -; GFX11-FAKE16-NEXT: v_bfe_u32 v32, v36, 16, 1 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v38, 0x40c00000, v38 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v35, v37 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v35, 16, v176 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v37, v37 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v32, v36 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v31, 0xffff, v31 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v25 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v27, 16, v27 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v34, v36 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v37, v26 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v33, v33, v38, vcc_lo +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v26 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v26, v26 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v35 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v48, 0x400000, v36 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 -; GFX11-FAKE16-NEXT: v_bfe_u32 v37, v38, 16, 1 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v34, v39, vcc_lo -; GFX11-FAKE16-NEXT: v_bfe_u32 v49, v35, 16, 1 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v25 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v36 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v35 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v26, v32, v37, vcc_lo ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 ; GFX11-FAKE16-NEXT: v_and_b32_e32 v27, 0xffff, v27 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v170, v33, 16, v31 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v31, 16, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, v49, v35 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v32, v32, v48 :: v_dual_add_nc_u32 v33, v37, v38 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v174 +; GFX11-FAKE16-NEXT: v_dual_add_f32 v25, 0x40c00000, v25 :: v_dual_lshlrev_b32 v36, 16, v24 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v24, 0xffff0000, v24 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v32, v34, v38, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v35, 16, 1 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v27, v33, 16, v27 +; GFX11-FAKE16-NEXT: v_bfe_u32 v33, v25, 16, 1 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v36 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v35 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v34, v35 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v36 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v36, 0x400000, v35 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v33, v25 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v26, 16, v26 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v24, 0x40c00000, v24 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v25 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-FAKE16-NEXT: v_bfe_u32 v39, v36, 16, 1 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v34, v37, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v25, v25 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v26, 0xffff, v26 +; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v24, 16, 1 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v23 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v23, 16, v23 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v25, v33, v38, vcc_lo +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v39, v36 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v26, v32, 16, v26 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v34 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v35, v24 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v35, 0x400000, v36 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v38 -; GFX11-FAKE16-NEXT: v_dual_add_f32 v35, 0x40c00000, v37 :: v_dual_cndmask_b32 v34, v34, v36 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v36, 16, v174 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xffff, v32 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_bfe_u32 v37, v35, 16, 1 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v34, 16, v34 -; GFX11-FAKE16-NEXT: v_dual_add_f32 v36, 0x40c00000, v36 :: v_dual_cndmask_b32 v33, v33, v39 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v169, v31, 16, v32 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v37, v37, v35 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff, v34 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_bfe_u32 v31, v36, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v25, 16, v25 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v24 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v23, 0x40c00000, v23 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v33, v33, v35, vcc_lo +; GFX11-FAKE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v37 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v24, v24 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v25, 0xffff, v25 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v36, 0xffff0000, v22 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xffff0000, v171 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v22, 16, v22 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v24, v34, v38, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v35, 16, 1 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v25, v32, 16, v25 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v33, 0xffff, v33 +; GFX11-FAKE16-NEXT: v_bfe_u32 v32, v23, 16, 1 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v24, 16, v24 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, v34, v35 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v36 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v38, 16, v177 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, v31, v36 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v176, v33, 16, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v37 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v32, v23 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v24, v24, 16, v33 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v34 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v34, 0x400000, v35 -; GFX11-FAKE16-NEXT: v_dual_add_f32 v32, 0x40c00000, v32 :: v_dual_lshlrev_b32 v37, 16, v171 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, 0x7fff, v31 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v38, 0x40c00000, v38 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v36, 16, 1 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v22, 0x40c00000, v22 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v36 ; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v33, v33, v34, vcc_lo -; GFX11-FAKE16-NEXT: v_or_b32_e32 v34, 0x400000, v36 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v37 -; GFX11-FAKE16-NEXT: v_bfe_u32 v37, v32, 16, 1 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v32 -; GFX11-FAKE16-NEXT: v_bfe_u32 v50, v38, 16, 1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v48, 0x400000, v35 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v31, v31, v34 :: v_dual_add_nc_u32 v36, v37, v32 -; GFX11-FAKE16-NEXT: v_bfe_u32 v34, v35, 16, 1 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v177 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, 0x7fff, v36 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v31, 16, v31 -; GFX11-FAKE16-NEXT: v_dual_add_f32 v37, 0x40c00000, v37 :: v_dual_add_nc_u32 v34, v34, v35 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v34, 0x400000, v23 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v23, v23 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v22 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v32, v36, v39, vcc_lo -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v23, v32, v34 :: v_dual_add_nc_u32 v34, v35, v36 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v21 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v21, 16, v21 +; GFX11-FAKE16-NEXT: v_bfe_u32 v32, v22, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v34, 0x7fff, v34 -; GFX11-FAKE16-NEXT: v_bfe_u32 v49, v37, 16, 1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, v50, v38 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v38 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v50, 16, v184 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v35, 0x40c00000, v35 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_dual_add_f32 v21, 0x40c00000, v21 :: v_dual_add_nc_u32 v32, v32, v22 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v23, 16, v23 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v34, v37, vcc_lo ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v34, v34, v48 :: v_dual_add_nc_u32 v35, v49, v37 +; GFX11-FAKE16-NEXT: v_bfe_u32 v39, v35, 16, 1 +; GFX11-FAKE16-NEXT: v_bfe_u32 v48, v21, 16, 1 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v22, v22 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v21 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v23, 0xffff, v23 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, v48, v21 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v34, 16, v34 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v22, v32, v38, vcc_lo +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, v39, v35 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v20 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, 0x7fff, v36 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v48, 0xffff0000, v184 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v20, 16, v20 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v21, v21 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v22, 16, v22 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v32 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v35 +; GFX11-FAKE16-NEXT: v_dual_add_f32 v20, 0x40c00000, v20 :: v_dual_cndmask_b32 v21, v36, v37 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v36, 0x40c00000, v38 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v22, 0xffff, v22 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v34, 16, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v49, 0x400000, v37 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v36, v36, v39, vcc_lo -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v37, v37 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v37, 0x40c00000, v50 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v31, 0xffff, v31 +; GFX11-FAKE16-NEXT: v_bfe_u32 v37, v20, 16, 1 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v21, 16, v21 +; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v36, 16, 1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v32, v32, v39, vcc_lo +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v23, v33, 16, v23 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v22, v34, 16, v22 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v21, 0xffff, v21 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, v35, v36 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 -; GFX11-FAKE16-NEXT: v_dual_add_f32 v38, 0x40c00000, v48 :: v_dual_cndmask_b32 v35, v35, v49 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v36, 16, v36 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff, v34 -; GFX11-FAKE16-NEXT: v_bfe_u32 v48, v37, 16, 1 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_bfe_u32 v39, v38, 16, 1 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v35, 16, v35 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v36, 0xffff, v36 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v174, v33, 16, v31 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v171, v32, 16, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, v48, v37 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v33, 0xffff0000, v175 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v34, 16, v175 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v39, v39, v38 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v177, v35, 16, v36 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v31, 0x7fff, v31 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v35, 0x400000, v37 -; GFX11-FAKE16-NEXT: v_dual_add_f32 v33, 0x40c00000, v33 :: v_dual_add_f32 v34, 0x40c00000, v34 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v37, v37 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v39 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v36, 0x400000, v38 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, v37, v20 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff0000, v19 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v19, 16, v19 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v20, v20 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v21, v32, 16, v21 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v32, 0x7fff, v35 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v33, 0x7fff, v33 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v35, 0x400000, v20 +; GFX11-FAKE16-NEXT: v_dual_add_f32 v19, 0x40c00000, v19 :: v_dual_add_f32 v34, 0x40c00000, v34 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v36 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v20, v33, v35 :: v_dual_and_b32 v33, 0xffff0000, v18 +; GFX11-FAKE16-NEXT: v_bfe_u32 v38, v19, 16, 1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v34, 16, 1 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 +; GFX11-FAKE16-NEXT: v_dual_add_f32 v33, 0x40c00000, v33 :: v_dual_lshlrev_b32 v18, 16, v18 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, v38, v19 +; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v32, v32, v37 :: v_dual_add_nc_u32 v35, v35, v34 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v19 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-FAKE16-NEXT: v_bfe_u32 v37, v33, 16, 1 -; GFX11-FAKE16-NEXT: v_bfe_u32 v39, v34, 16, 1 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v31, v31, v35, vcc_lo -; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v173 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v48, 16, v173 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v49, 0x400000, v33 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v31, 16, v31 -; GFX11-FAKE16-NEXT: v_dual_add_f32 v35, 0x40c00000, v35 :: v_dual_cndmask_b32 v32, v32, v36 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, v37, v33 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v37, v39, v34 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, 0x7fff, v36 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v19, v19 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 ; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v34 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_bfe_u32 v38, v35, 16, 1 +; GFX11-FAKE16-NEXT: v_dual_add_f32 v18, 0x40c00000, v18 :: v_dual_add_nc_u32 v37, v37, v33 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v19, v36, v38, vcc_lo ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, 0x7fff, v36 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v37, 0x7fff, v37 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v38, v38, v35 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v31, 0xffff, v31 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v122, v3, 16, v6 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v37, v39, vcc_lo +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v20, 16, v20 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, 0x7fff, v37 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v37, 0x400000, v33 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v38, 0x400000, v18 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v35, v39, vcc_lo +; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v18, 16, 1 ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v33, v33 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v37, 0x7fff, v38 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v38, 0x40c00000, v48 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v39, 0x400000, v35 -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v33, v36, v49 :: v_dual_lshlrev_b32 v48, 16, v183 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_bfe_u32 v36, v38, 16, 1 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v49, 0x400000, v38 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v48, 0x40c00000, v48 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v34, 16, v34 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v35, v37, v39, vcc_lo -; GFX11-FAKE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v172 -; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v39, 16, v172 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, v36, v38 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v55, 0x400000, v48 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v19, 16, v19 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v20, 0xffff, v20 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, v35, v18 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v33, v36, v37, vcc_lo +; GFX11-FAKE16-NEXT: v_and_b32_e32 v36, 0xffff0000, v17 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v17, 16, v17 +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v37, 16, v16 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v18, v18 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_dual_add_f32 v36, 0x40c00000, v36 :: v_dual_add_f32 v17, 0x40c00000, v17 ; GFX11-FAKE16-NEXT: v_add_f32_e32 v37, 0x40c00000, v37 -; GFX11-FAKE16-NEXT: v_add_f32_e32 v39, 0x40c00000, v39 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v36, 0x7fff, v36 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff, v34 -; GFX11-FAKE16-NEXT: v_bfe_u32 v50, v37, 16, 1 -; GFX11-FAKE16-NEXT: v_bfe_u32 v38, v39, 16, 1 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v36, v36, v49, vcc_lo -; GFX11-FAKE16-NEXT: v_or_b32_e32 v54, 0x400000, v39 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v39, v39 -; GFX11-FAKE16-NEXT: v_dual_add_f32 v50, 0x40c00000, v51 :: v_dual_add_nc_u32 v49, v50, v37 -; GFX11-FAKE16-NEXT: v_bfe_u32 v51, v48, 16, 1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v38, v38, v39 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v53, 0x400000, v37 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v16, 0xffff0000, v16 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v18, v35, v38, vcc_lo ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v49, 0x7fff, v49 -; GFX11-FAKE16-NEXT: v_bfe_u32 v52, v50, 16, 1 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v51, v51, v48 +; GFX11-FAKE16-NEXT: v_bfe_u32 v39, v36, 16, 1 +; GFX11-FAKE16-NEXT: v_bfe_u32 v35, v17, 16, 1 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v50, 0x400000, v17 +; GFX11-FAKE16-NEXT: v_add_f32_e32 v16, 0x40c00000, v16 +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v17, v17 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v38, v39, v36 +; GFX11-FAKE16-NEXT: v_bfe_u32 v39, v37, 16, 1 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, v35, v17 +; GFX11-FAKE16-NEXT: v_bfe_u32 v48, v16, 16, 1 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v51, 0x400000, v37 ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v38, 0x7fff, v38 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v36, 16, v36 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v35, 16, v35 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v52, v52, v50 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v38, v38, v54 :: v_dual_add_nc_u32 v51, 0x7fff, v51 -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v48, v48 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v36, 0xffff, v36 -; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v39, 0x7fff, v52 -; GFX11-FAKE16-NEXT: v_or_b32_e32 v52, 0x400000, v50 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v38, 16, v38 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v48, v51, v55, vcc_lo +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v39, v39, v37 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v35 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v49, 0x400000, v36 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v48, v48, v16 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v18, 16, v18 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v39, 0x7fff, v39 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v17, v35, v50, vcc_lo ; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v37, v37 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v184, v32, 16, v31 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v175, v33, 16, v34 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v38, 0xffff, v38 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v48, 16, v48 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v37, v49, v53, vcc_lo -; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v50, v50 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v173, v35, 16, v36 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v97, v8, 16, v10 -; GFX11-FAKE16-NEXT: v_and_b32_e32 v48, 0xffff, v48 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v37, 16, v37 -; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v39, v39, v52, vcc_lo -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v86, v9, 16, v12 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v76, v11, 16, v13 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v67, v14, 16, v17 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v172, v37, 16, v38 -; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v39, 16, v39 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v59, v16, 16, v19 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v52, v18, 16, v20 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v46, v21, 16, v23 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v41, v22, 16, v25 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v183, v39, 16, v48 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v37, v24, 16, v27 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v34, v26, 16, v28 -; GFX11-FAKE16-NEXT: v_lshl_or_b32 v32, v29, 16, v30 +; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v35, 0x7fff, v48 +; GFX11-FAKE16-NEXT: v_or_b32_e32 v48, 0x400000, v16 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v34, 16, v34 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v17, 16, v17 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v37, v39, v51, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v19, 0xffff, v19 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v18, 0xffff, v18 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v17, 0xffff, v17 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v36, v38, v49, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v16, v16 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v20, v32, 16, v20 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v19, v34, 16, v19 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v18, v33, 16, v18 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v36, 16, v36 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v16, v35, v48, vcc_lo +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v35, 16, v37 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v17, v36, 16, v17 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v16, 16, v16 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_and_b32_e32 v35, 0xffff, v35 +; GFX11-FAKE16-NEXT: v_lshl_or_b32 v16, v16, 16, v35 ; GFX11-FAKE16-NEXT: .LBB79_3: ; %end -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v3, v41 :: v_dual_mov_b32 v4, v46 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v6, v59 :: v_dual_mov_b32 v9, v86 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v7, v67 :: v_dual_mov_b32 v8, v76 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v10, v97 :: v_dual_mov_b32 v13, v136 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v11, v109 :: v_dual_mov_b32 v12, v122 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v14, v151 :: v_dual_mov_b32 v17, v172 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v18, v173 :: v_dual_mov_b32 v19, v175 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v20, v184 :: v_dual_mov_b32 v23, v174 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v22, v171 :: v_dual_mov_b32 v25, v169 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v26, v170 :: v_dual_mov_b32 v29, v180 -; GFX11-FAKE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-FAKE16-NEXT: scratch_load_b32 v184, off, s32 -; GFX11-FAKE16-NEXT: scratch_load_b32 v175, off, s32 offset:4 -; GFX11-FAKE16-NEXT: scratch_load_b32 v174, off, s32 offset:8 -; GFX11-FAKE16-NEXT: scratch_load_b32 v173, off, s32 offset:12 -; GFX11-FAKE16-NEXT: scratch_load_b32 v172, off, s32 offset:16 -; GFX11-FAKE16-NEXT: scratch_load_b32 v171, off, s32 offset:20 -; GFX11-FAKE16-NEXT: scratch_load_b32 v170, off, s32 offset:24 -; GFX11-FAKE16-NEXT: scratch_load_b32 v169, off, s32 offset:28 -; GFX11-FAKE16-NEXT: scratch_load_b32 v168, off, s32 offset:32 -; GFX11-FAKE16-NEXT: scratch_load_b32 v159, off, s32 offset:36 -; GFX11-FAKE16-NEXT: scratch_load_b32 v158, off, s32 offset:40 -; GFX11-FAKE16-NEXT: scratch_load_b32 v157, off, s32 offset:44 -; GFX11-FAKE16-NEXT: scratch_load_b32 v156, off, s32 offset:48 -; GFX11-FAKE16-NEXT: scratch_load_b32 v155, off, s32 offset:52 -; GFX11-FAKE16-NEXT: scratch_load_b32 v154, off, s32 offset:56 -; GFX11-FAKE16-NEXT: scratch_load_b32 v153, off, s32 offset:60 -; GFX11-FAKE16-NEXT: scratch_load_b32 v152, off, s32 offset:64 -; GFX11-FAKE16-NEXT: scratch_load_b32 v143, off, s32 offset:68 -; GFX11-FAKE16-NEXT: scratch_load_b32 v142, off, s32 offset:72 -; GFX11-FAKE16-NEXT: scratch_load_b32 v141, off, s32 offset:76 -; GFX11-FAKE16-NEXT: scratch_load_b32 v140, off, s32 offset:80 -; GFX11-FAKE16-NEXT: scratch_load_b32 v139, off, s32 offset:84 -; GFX11-FAKE16-NEXT: scratch_load_b32 v138, off, s32 offset:88 -; GFX11-FAKE16-NEXT: scratch_load_b32 v137, off, s32 offset:92 -; GFX11-FAKE16-NEXT: scratch_load_b32 v136, off, s32 offset:96 -; GFX11-FAKE16-NEXT: scratch_load_b32 v127, off, s32 offset:100 -; GFX11-FAKE16-NEXT: scratch_load_b32 v126, off, s32 offset:104 -; GFX11-FAKE16-NEXT: scratch_load_b32 v125, off, s32 offset:108 -; GFX11-FAKE16-NEXT: scratch_load_b32 v124, off, s32 offset:112 -; GFX11-FAKE16-NEXT: scratch_load_b32 v123, off, s32 offset:116 -; GFX11-FAKE16-NEXT: scratch_load_b32 v122, off, s32 offset:120 -; GFX11-FAKE16-NEXT: scratch_load_b32 v121, off, s32 offset:124 -; GFX11-FAKE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-FAKE16-NEXT: scratch_load_b32 v120, off, s32 offset:128 -; GFX11-FAKE16-NEXT: scratch_load_b32 v111, off, s32 offset:132 -; GFX11-FAKE16-NEXT: scratch_load_b32 v110, off, s32 offset:136 -; GFX11-FAKE16-NEXT: scratch_load_b32 v109, off, s32 offset:140 -; GFX11-FAKE16-NEXT: scratch_load_b32 v108, off, s32 offset:144 -; GFX11-FAKE16-NEXT: scratch_load_b32 v107, off, s32 offset:148 -; GFX11-FAKE16-NEXT: scratch_load_b32 v106, off, s32 offset:152 -; GFX11-FAKE16-NEXT: scratch_load_b32 v105, off, s32 offset:156 -; GFX11-FAKE16-NEXT: scratch_load_b32 v104, off, s32 offset:160 -; GFX11-FAKE16-NEXT: scratch_load_b32 v95, off, s32 offset:164 -; GFX11-FAKE16-NEXT: scratch_load_b32 v94, off, s32 offset:168 -; GFX11-FAKE16-NEXT: scratch_load_b32 v93, off, s32 offset:172 -; GFX11-FAKE16-NEXT: scratch_load_b32 v92, off, s32 offset:176 -; GFX11-FAKE16-NEXT: scratch_load_b32 v91, off, s32 offset:180 -; GFX11-FAKE16-NEXT: scratch_load_b32 v90, off, s32 offset:184 -; GFX11-FAKE16-NEXT: scratch_load_b32 v89, off, s32 offset:188 -; GFX11-FAKE16-NEXT: scratch_load_b32 v88, off, s32 offset:192 -; GFX11-FAKE16-NEXT: scratch_load_b32 v79, off, s32 offset:196 -; GFX11-FAKE16-NEXT: scratch_load_b32 v78, off, s32 offset:200 -; GFX11-FAKE16-NEXT: scratch_load_b32 v77, off, s32 offset:204 -; GFX11-FAKE16-NEXT: scratch_load_b32 v76, off, s32 offset:208 -; GFX11-FAKE16-NEXT: scratch_load_b32 v75, off, s32 offset:212 -; GFX11-FAKE16-NEXT: scratch_load_b32 v74, off, s32 offset:216 -; GFX11-FAKE16-NEXT: scratch_load_b32 v73, off, s32 offset:220 -; GFX11-FAKE16-NEXT: scratch_load_b32 v72, off, s32 offset:224 -; GFX11-FAKE16-NEXT: scratch_load_b32 v63, off, s32 offset:228 -; GFX11-FAKE16-NEXT: scratch_load_b32 v62, off, s32 offset:232 -; GFX11-FAKE16-NEXT: scratch_load_b32 v61, off, s32 offset:236 -; GFX11-FAKE16-NEXT: scratch_load_b32 v60, off, s32 offset:240 -; GFX11-FAKE16-NEXT: scratch_load_b32 v59, off, s32 offset:244 -; GFX11-FAKE16-NEXT: scratch_load_b32 v58, off, s32 offset:248 -; GFX11-FAKE16-NEXT: scratch_load_b32 v57, off, s32 offset:252 -; GFX11-FAKE16-NEXT: s_clause 0x8 ; 36-byte Folded Reload -; GFX11-FAKE16-NEXT: scratch_load_b32 v56, off, s32 offset:256 -; GFX11-FAKE16-NEXT: scratch_load_b32 v47, off, s32 offset:260 -; GFX11-FAKE16-NEXT: scratch_load_b32 v46, off, s32 offset:264 -; GFX11-FAKE16-NEXT: scratch_load_b32 v45, off, s32 offset:268 -; GFX11-FAKE16-NEXT: scratch_load_b32 v44, off, s32 offset:272 -; GFX11-FAKE16-NEXT: scratch_load_b32 v43, off, s32 offset:276 -; GFX11-FAKE16-NEXT: scratch_load_b32 v42, off, s32 offset:280 -; GFX11-FAKE16-NEXT: scratch_load_b32 v41, off, s32 offset:284 -; GFX11-FAKE16-NEXT: scratch_load_b32 v40, off, s32 offset:288 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, v32 :: v_dual_mov_b32 v1, v34 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v2, v37 :: v_dual_mov_b32 v5, v52 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v16, v183 :: v_dual_mov_b32 v21, v177 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v24, v176 :: v_dual_mov_b32 v27, v181 -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v28, v182 -; GFX11-FAKE16-NEXT: v_dual_mov_b32 v30, v179 :: v_dual_mov_b32 v31, v178 -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-FAKE16-NEXT: .LBB79_4: -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154 -; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr15 ; GFX11-FAKE16-NEXT: s_branch .LBB79_2 %cmp = icmp eq i32 %b, 0 br i1 %cmp, label %cmp.true, label %cmp.false @@ -144471,252 +141599,80 @@ define inreg <16 x double> @bitcast_v64f16_to_v16f64_scalar(<64 x half> inreg %a ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 -; GFX11-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:292 -; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:288 -; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:284 -; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:280 -; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:276 -; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:272 -; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:268 -; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:264 -; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:260 -; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:256 -; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:252 -; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:248 -; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:244 -; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:240 -; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:236 -; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:232 -; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:228 -; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:224 -; GFX11-NEXT: scratch_store_b32 off, v74, s32 offset:220 -; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:216 -; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:212 -; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:208 -; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:204 -; GFX11-NEXT: scratch_store_b32 off, v79, s32 offset:200 -; GFX11-NEXT: scratch_store_b32 off, v88, s32 offset:196 -; GFX11-NEXT: scratch_store_b32 off, v89, s32 offset:192 -; GFX11-NEXT: scratch_store_b32 off, v90, s32 offset:188 -; GFX11-NEXT: scratch_store_b32 off, v91, s32 offset:184 -; GFX11-NEXT: scratch_store_b32 off, v92, s32 offset:180 -; GFX11-NEXT: scratch_store_b32 off, v93, s32 offset:176 -; GFX11-NEXT: scratch_store_b32 off, v94, s32 offset:172 -; GFX11-NEXT: scratch_store_b32 off, v95, s32 offset:168 -; GFX11-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-NEXT: scratch_store_b32 off, v104, s32 offset:164 -; GFX11-NEXT: scratch_store_b32 off, v105, s32 offset:160 -; GFX11-NEXT: scratch_store_b32 off, v106, s32 offset:156 -; GFX11-NEXT: scratch_store_b32 off, v107, s32 offset:152 -; GFX11-NEXT: scratch_store_b32 off, v108, s32 offset:148 -; GFX11-NEXT: scratch_store_b32 off, v109, s32 offset:144 -; GFX11-NEXT: scratch_store_b32 off, v110, s32 offset:140 -; GFX11-NEXT: scratch_store_b32 off, v111, s32 offset:136 -; GFX11-NEXT: scratch_store_b32 off, v120, s32 offset:132 -; GFX11-NEXT: scratch_store_b32 off, v121, s32 offset:128 -; GFX11-NEXT: scratch_store_b32 off, v122, s32 offset:124 -; GFX11-NEXT: scratch_store_b32 off, v123, s32 offset:120 -; GFX11-NEXT: scratch_store_b32 off, v124, s32 offset:116 -; GFX11-NEXT: scratch_store_b32 off, v125, s32 offset:112 -; GFX11-NEXT: scratch_store_b32 off, v126, s32 offset:108 -; GFX11-NEXT: scratch_store_b32 off, v127, s32 offset:104 -; GFX11-NEXT: scratch_store_b32 off, v136, s32 offset:100 -; GFX11-NEXT: scratch_store_b32 off, v137, s32 offset:96 -; GFX11-NEXT: scratch_store_b32 off, v138, s32 offset:92 -; GFX11-NEXT: scratch_store_b32 off, v139, s32 offset:88 -; GFX11-NEXT: scratch_store_b32 off, v140, s32 offset:84 -; GFX11-NEXT: scratch_store_b32 off, v141, s32 offset:80 -; GFX11-NEXT: scratch_store_b32 off, v142, s32 offset:76 -; GFX11-NEXT: scratch_store_b32 off, v143, s32 offset:72 -; GFX11-NEXT: scratch_store_b32 off, v152, s32 offset:68 -; GFX11-NEXT: scratch_store_b32 off, v153, s32 offset:64 -; GFX11-NEXT: scratch_store_b32 off, v154, s32 offset:60 -; GFX11-NEXT: scratch_store_b32 off, v155, s32 offset:56 -; GFX11-NEXT: scratch_store_b32 off, v156, s32 offset:52 -; GFX11-NEXT: scratch_store_b32 off, v157, s32 offset:48 -; GFX11-NEXT: scratch_store_b32 off, v158, s32 offset:44 -; GFX11-NEXT: scratch_store_b32 off, v159, s32 offset:40 -; GFX11-NEXT: s_clause 0x9 ; 40-byte Folded Spill -; GFX11-NEXT: scratch_store_b32 off, v168, s32 offset:36 -; GFX11-NEXT: scratch_store_b32 off, v169, s32 offset:32 -; GFX11-NEXT: scratch_store_b32 off, v170, s32 offset:28 -; GFX11-NEXT: scratch_store_b32 off, v171, s32 offset:24 -; GFX11-NEXT: scratch_store_b32 off, v172, s32 offset:20 -; GFX11-NEXT: scratch_store_b32 off, v173, s32 offset:16 -; GFX11-NEXT: scratch_store_b32 off, v174, s32 offset:12 -; GFX11-NEXT: scratch_store_b32 off, v175, s32 offset:8 -; GFX11-NEXT: scratch_store_b32 off, v184, s32 offset:4 -; GFX11-NEXT: scratch_store_b32 off, v185, s32 -; GFX11-NEXT: v_dual_mov_b32 v176, v13 :: v_dual_mov_b32 v177, v12 -; GFX11-NEXT: v_dual_mov_b32 v178, v11 :: v_dual_mov_b32 v179, v10 -; GFX11-NEXT: v_dual_mov_b32 v180, v9 :: v_dual_mov_b32 v181, v8 -; GFX11-NEXT: v_dual_mov_b32 v182, v7 :: v_dual_mov_b32 v183, v6 -; GFX11-NEXT: v_dual_mov_b32 v170, v5 :: v_dual_mov_b32 v171, v4 -; GFX11-NEXT: v_dual_mov_b32 v172, v3 :: v_dual_mov_b32 v173, v2 -; GFX11-NEXT: v_dual_mov_b32 v174, v1 :: v_dual_mov_b32 v175, v0 -; GFX11-NEXT: v_dual_mov_b32 v184, s28 :: v_dual_mov_b32 v185, s29 +; GFX11-NEXT: v_dual_mov_b32 v31, v13 :: v_dual_mov_b32 v30, v12 +; GFX11-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 +; GFX11-NEXT: v_dual_mov_b32 v27, v9 :: v_dual_mov_b32 v26, v8 +; GFX11-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-NEXT: v_dual_mov_b32 v16, s28 :: v_dual_mov_b32 v17, s29 ; GFX11-NEXT: s_mov_b32 s4, 0 ; GFX11-NEXT: s_and_b32 s5, vcc_lo, exec_lo ; GFX11-NEXT: s_cbranch_scc0 .LBB83_4 ; GFX11-NEXT: ; %bb.1: ; %cmp.false -; GFX11-NEXT: v_dual_mov_b32 v47, s0 :: v_dual_mov_b32 v52, s2 -; GFX11-NEXT: v_dual_mov_b32 v49, s1 :: v_dual_mov_b32 v56, s3 -; GFX11-NEXT: v_dual_mov_b32 v61, s16 :: v_dual_mov_b32 v74, s18 -; GFX11-NEXT: v_dual_mov_b32 v67, s17 :: v_dual_mov_b32 v82, s19 -; GFX11-NEXT: v_dual_mov_b32 v91, s20 :: v_dual_mov_b32 v112, s22 -; GFX11-NEXT: v_dual_mov_b32 v101, s21 :: v_dual_mov_b32 v124, s23 -; GFX11-NEXT: v_dual_mov_b32 v137, s24 :: v_dual_mov_b32 v14, s26 -; GFX11-NEXT: v_dual_mov_b32 v151, s25 :: v_dual_mov_b32 v30, s27 +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-NEXT: v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v5, s17 +; GFX11-NEXT: v_dual_mov_b32 v6, s18 :: v_dual_mov_b32 v7, s19 +; GFX11-NEXT: v_dual_mov_b32 v8, s20 :: v_dual_mov_b32 v9, s21 +; GFX11-NEXT: v_dual_mov_b32 v10, s22 :: v_dual_mov_b32 v11, s23 +; GFX11-NEXT: v_dual_mov_b32 v12, s24 :: v_dual_mov_b32 v13, s25 +; GFX11-NEXT: v_dual_mov_b32 v14, s26 :: v_dual_mov_b32 v15, s27 ; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 ; GFX11-NEXT: s_cbranch_vccnz .LBB83_3 ; GFX11-NEXT: .LBB83_2: ; %cmp.true -; GFX11-NEXT: v_pk_add_f16 v30, 0x200, s27 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v15, 0x200, s27 op_sel_hi:[0,1] ; GFX11-NEXT: v_pk_add_f16 v14, 0x200, s26 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v176, 0x200, v176 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v177, 0x200, v177 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v178, 0x200, v178 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v179, 0x200, v179 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v180, 0x200, v180 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v181, 0x200, v181 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v182, 0x200, v182 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v183, 0x200, v183 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v170, 0x200, v170 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v171, 0x200, v171 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v172, 0x200, v172 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v173, 0x200, v173 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v174, 0x200, v174 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v175, 0x200, v175 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v185, 0x200, v185 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v184, 0x200, v184 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v151, 0x200, s25 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v137, 0x200, s24 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v124, 0x200, s23 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v112, 0x200, s22 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v101, 0x200, s21 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v91, 0x200, s20 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v82, 0x200, s19 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v74, 0x200, s18 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v67, 0x200, s17 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v61, 0x200, s16 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v56, 0x200, s3 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v52, 0x200, s2 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v49, 0x200, s1 op_sel_hi:[0,1] -; GFX11-NEXT: v_pk_add_f16 v47, 0x200, s0 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v13, 0x200, s25 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v12, 0x200, s24 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v11, 0x200, s23 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v10, 0x200, s22 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v9, 0x200, s21 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v8, 0x200, s20 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v7, 0x200, s19 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v6, 0x200, s18 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v5, 0x200, s17 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v4, 0x200, s16 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v3, 0x200, s3 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v2, 0x200, s2 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v1, 0x200, s1 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v0, 0x200, s0 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v31, 0x200, v31 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v30, 0x200, v30 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v29, 0x200, v29 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v28, 0x200, v28 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v27, 0x200, v27 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v26, 0x200, v26 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v25, 0x200, v25 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v24, 0x200, v24 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v23, 0x200, v23 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v22, 0x200, v22 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v21, 0x200, v21 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v20, 0x200, v20 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v19, 0x200, v19 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v18, 0x200, v18 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v17, 0x200, v17 op_sel_hi:[0,1] +; GFX11-NEXT: v_pk_add_f16 v16, 0x200, v16 op_sel_hi:[0,1] ; GFX11-NEXT: .LBB83_3: ; %end -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_dual_mov_b32 v0, v47 :: v_dual_mov_b32 v1, v49 -; GFX11-NEXT: v_dual_mov_b32 v3, v56 :: v_dual_mov_b32 v4, v61 -; GFX11-NEXT: v_dual_mov_b32 v6, v74 :: v_dual_mov_b32 v9, v101 -; GFX11-NEXT: v_dual_mov_b32 v7, v82 :: v_dual_mov_b32 v8, v91 -; GFX11-NEXT: v_dual_mov_b32 v11, v124 :: v_dual_mov_b32 v12, v137 -; GFX11-NEXT: v_dual_mov_b32 v15, v30 :: v_dual_mov_b32 v16, v184 -; GFX11-NEXT: v_dual_mov_b32 v17, v185 :: v_dual_mov_b32 v18, v175 -; GFX11-NEXT: v_dual_mov_b32 v19, v174 :: v_dual_mov_b32 v20, v173 -; GFX11-NEXT: v_dual_mov_b32 v21, v172 :: v_dual_mov_b32 v22, v171 -; GFX11-NEXT: v_dual_mov_b32 v23, v170 :: v_dual_mov_b32 v24, v183 -; GFX11-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-NEXT: scratch_load_b32 v185, off, s32 -; GFX11-NEXT: scratch_load_b32 v184, off, s32 offset:4 -; GFX11-NEXT: scratch_load_b32 v175, off, s32 offset:8 -; GFX11-NEXT: scratch_load_b32 v174, off, s32 offset:12 -; GFX11-NEXT: scratch_load_b32 v173, off, s32 offset:16 -; GFX11-NEXT: scratch_load_b32 v172, off, s32 offset:20 -; GFX11-NEXT: scratch_load_b32 v171, off, s32 offset:24 -; GFX11-NEXT: scratch_load_b32 v170, off, s32 offset:28 -; GFX11-NEXT: scratch_load_b32 v169, off, s32 offset:32 -; GFX11-NEXT: scratch_load_b32 v168, off, s32 offset:36 -; GFX11-NEXT: scratch_load_b32 v159, off, s32 offset:40 -; GFX11-NEXT: scratch_load_b32 v158, off, s32 offset:44 -; GFX11-NEXT: scratch_load_b32 v157, off, s32 offset:48 -; GFX11-NEXT: scratch_load_b32 v156, off, s32 offset:52 -; GFX11-NEXT: scratch_load_b32 v155, off, s32 offset:56 -; GFX11-NEXT: scratch_load_b32 v154, off, s32 offset:60 -; GFX11-NEXT: scratch_load_b32 v153, off, s32 offset:64 -; GFX11-NEXT: scratch_load_b32 v152, off, s32 offset:68 -; GFX11-NEXT: scratch_load_b32 v143, off, s32 offset:72 -; GFX11-NEXT: scratch_load_b32 v142, off, s32 offset:76 -; GFX11-NEXT: scratch_load_b32 v141, off, s32 offset:80 -; GFX11-NEXT: scratch_load_b32 v140, off, s32 offset:84 -; GFX11-NEXT: scratch_load_b32 v139, off, s32 offset:88 -; GFX11-NEXT: scratch_load_b32 v138, off, s32 offset:92 -; GFX11-NEXT: scratch_load_b32 v137, off, s32 offset:96 -; GFX11-NEXT: scratch_load_b32 v136, off, s32 offset:100 -; GFX11-NEXT: scratch_load_b32 v127, off, s32 offset:104 -; GFX11-NEXT: scratch_load_b32 v126, off, s32 offset:108 -; GFX11-NEXT: scratch_load_b32 v125, off, s32 offset:112 -; GFX11-NEXT: scratch_load_b32 v124, off, s32 offset:116 -; GFX11-NEXT: scratch_load_b32 v123, off, s32 offset:120 -; GFX11-NEXT: scratch_load_b32 v122, off, s32 offset:124 -; GFX11-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-NEXT: scratch_load_b32 v121, off, s32 offset:128 -; GFX11-NEXT: scratch_load_b32 v120, off, s32 offset:132 -; GFX11-NEXT: scratch_load_b32 v111, off, s32 offset:136 -; GFX11-NEXT: scratch_load_b32 v110, off, s32 offset:140 -; GFX11-NEXT: scratch_load_b32 v109, off, s32 offset:144 -; GFX11-NEXT: scratch_load_b32 v108, off, s32 offset:148 -; GFX11-NEXT: scratch_load_b32 v107, off, s32 offset:152 -; GFX11-NEXT: scratch_load_b32 v106, off, s32 offset:156 -; GFX11-NEXT: scratch_load_b32 v105, off, s32 offset:160 -; GFX11-NEXT: scratch_load_b32 v104, off, s32 offset:164 -; GFX11-NEXT: scratch_load_b32 v95, off, s32 offset:168 -; GFX11-NEXT: scratch_load_b32 v94, off, s32 offset:172 -; GFX11-NEXT: scratch_load_b32 v93, off, s32 offset:176 -; GFX11-NEXT: scratch_load_b32 v92, off, s32 offset:180 -; GFX11-NEXT: scratch_load_b32 v91, off, s32 offset:184 -; GFX11-NEXT: scratch_load_b32 v90, off, s32 offset:188 -; GFX11-NEXT: scratch_load_b32 v89, off, s32 offset:192 -; GFX11-NEXT: scratch_load_b32 v88, off, s32 offset:196 -; GFX11-NEXT: scratch_load_b32 v79, off, s32 offset:200 -; GFX11-NEXT: scratch_load_b32 v78, off, s32 offset:204 -; GFX11-NEXT: scratch_load_b32 v77, off, s32 offset:208 -; GFX11-NEXT: scratch_load_b32 v76, off, s32 offset:212 -; GFX11-NEXT: scratch_load_b32 v75, off, s32 offset:216 -; GFX11-NEXT: scratch_load_b32 v74, off, s32 offset:220 -; GFX11-NEXT: scratch_load_b32 v73, off, s32 offset:224 -; GFX11-NEXT: scratch_load_b32 v72, off, s32 offset:228 -; GFX11-NEXT: scratch_load_b32 v63, off, s32 offset:232 -; GFX11-NEXT: scratch_load_b32 v62, off, s32 offset:236 -; GFX11-NEXT: scratch_load_b32 v61, off, s32 offset:240 -; GFX11-NEXT: scratch_load_b32 v60, off, s32 offset:244 -; GFX11-NEXT: scratch_load_b32 v59, off, s32 offset:248 -; GFX11-NEXT: scratch_load_b32 v58, off, s32 offset:252 -; GFX11-NEXT: s_clause 0x9 ; 40-byte Folded Reload -; GFX11-NEXT: scratch_load_b32 v57, off, s32 offset:256 -; GFX11-NEXT: scratch_load_b32 v56, off, s32 offset:260 -; GFX11-NEXT: scratch_load_b32 v47, off, s32 offset:264 -; GFX11-NEXT: scratch_load_b32 v46, off, s32 offset:268 -; GFX11-NEXT: scratch_load_b32 v45, off, s32 offset:272 -; GFX11-NEXT: scratch_load_b32 v44, off, s32 offset:276 -; GFX11-NEXT: scratch_load_b32 v43, off, s32 offset:280 -; GFX11-NEXT: scratch_load_b32 v42, off, s32 offset:284 -; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:288 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:292 -; GFX11-NEXT: v_dual_mov_b32 v2, v52 :: v_dual_mov_b32 v5, v67 -; GFX11-NEXT: v_dual_mov_b32 v10, v112 :: v_dual_mov_b32 v13, v151 -; GFX11-NEXT: v_dual_mov_b32 v25, v182 :: v_dual_mov_b32 v26, v181 -; GFX11-NEXT: v_dual_mov_b32 v27, v180 :: v_dual_mov_b32 v28, v179 -; GFX11-NEXT: v_dual_mov_b32 v29, v178 :: v_dual_mov_b32 v30, v177 -; GFX11-NEXT: v_mov_b32_e32 v31, v176 -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; GFX11-NEXT: .LBB83_4: -; GFX11-NEXT: ; implicit-def: $vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78 -; GFX11-NEXT: ; implicit-def: $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79 -; GFX11-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-NEXT: ; implicit-def: $vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81 -; GFX11-NEXT: ; implicit-def: $vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84 -; GFX11-NEXT: ; implicit-def: $vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88 -; GFX11-NEXT: ; implicit-def: $vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93 -; GFX11-NEXT: ; implicit-def: $vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99 -; GFX11-NEXT: ; implicit-def: $vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106 -; GFX11-NEXT: ; implicit-def: $vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114 -; GFX11-NEXT: ; implicit-def: $vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123 -; GFX11-NEXT: ; implicit-def: $vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133 -; GFX11-NEXT: ; implicit-def: $vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144 -; GFX11-NEXT: ; implicit-def: $vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156 -; GFX11-NEXT: ; implicit-def: $vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169 +; GFX11-NEXT: ; implicit-def: $vgpr0 +; GFX11-NEXT: ; implicit-def: $vgpr1 +; GFX11-NEXT: ; implicit-def: $vgpr2 +; GFX11-NEXT: ; implicit-def: $vgpr3 +; GFX11-NEXT: ; implicit-def: $vgpr4 +; GFX11-NEXT: ; implicit-def: $vgpr5 +; GFX11-NEXT: ; implicit-def: $vgpr6 +; GFX11-NEXT: ; implicit-def: $vgpr7 +; GFX11-NEXT: ; implicit-def: $vgpr8 +; GFX11-NEXT: ; implicit-def: $vgpr9 +; GFX11-NEXT: ; implicit-def: $vgpr10 +; GFX11-NEXT: ; implicit-def: $vgpr11 +; GFX11-NEXT: ; implicit-def: $vgpr12 +; GFX11-NEXT: ; implicit-def: $vgpr13 +; GFX11-NEXT: ; implicit-def: $vgpr14 +; GFX11-NEXT: ; implicit-def: $vgpr15 ; GFX11-NEXT: s_branch .LBB83_2 %cmp = icmp eq i32 %b, 0 br i1 %cmp, label %cmp.true, label %cmp.false @@ -147375,252 +144331,80 @@ define inreg <16 x double> @bitcast_v64i16_to_v16f64_scalar(<64 x i16> inreg %a, ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 -; GFX11-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:292 -; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:288 -; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:284 -; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:280 -; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:276 -; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:272 -; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:268 -; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:264 -; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:260 -; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:256 -; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:252 -; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:248 -; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:244 -; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:240 -; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:236 -; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:232 -; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:228 -; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:224 -; GFX11-NEXT: scratch_store_b32 off, v74, s32 offset:220 -; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:216 -; GFX11-NEXT: scratch_store_b32 off, v76, s32 offset:212 -; GFX11-NEXT: scratch_store_b32 off, v77, s32 offset:208 -; GFX11-NEXT: scratch_store_b32 off, v78, s32 offset:204 -; GFX11-NEXT: scratch_store_b32 off, v79, s32 offset:200 -; GFX11-NEXT: scratch_store_b32 off, v88, s32 offset:196 -; GFX11-NEXT: scratch_store_b32 off, v89, s32 offset:192 -; GFX11-NEXT: scratch_store_b32 off, v90, s32 offset:188 -; GFX11-NEXT: scratch_store_b32 off, v91, s32 offset:184 -; GFX11-NEXT: scratch_store_b32 off, v92, s32 offset:180 -; GFX11-NEXT: scratch_store_b32 off, v93, s32 offset:176 -; GFX11-NEXT: scratch_store_b32 off, v94, s32 offset:172 -; GFX11-NEXT: scratch_store_b32 off, v95, s32 offset:168 -; GFX11-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-NEXT: scratch_store_b32 off, v104, s32 offset:164 -; GFX11-NEXT: scratch_store_b32 off, v105, s32 offset:160 -; GFX11-NEXT: scratch_store_b32 off, v106, s32 offset:156 -; GFX11-NEXT: scratch_store_b32 off, v107, s32 offset:152 -; GFX11-NEXT: scratch_store_b32 off, v108, s32 offset:148 -; GFX11-NEXT: scratch_store_b32 off, v109, s32 offset:144 -; GFX11-NEXT: scratch_store_b32 off, v110, s32 offset:140 -; GFX11-NEXT: scratch_store_b32 off, v111, s32 offset:136 -; GFX11-NEXT: scratch_store_b32 off, v120, s32 offset:132 -; GFX11-NEXT: scratch_store_b32 off, v121, s32 offset:128 -; GFX11-NEXT: scratch_store_b32 off, v122, s32 offset:124 -; GFX11-NEXT: scratch_store_b32 off, v123, s32 offset:120 -; GFX11-NEXT: scratch_store_b32 off, v124, s32 offset:116 -; GFX11-NEXT: scratch_store_b32 off, v125, s32 offset:112 -; GFX11-NEXT: scratch_store_b32 off, v126, s32 offset:108 -; GFX11-NEXT: scratch_store_b32 off, v127, s32 offset:104 -; GFX11-NEXT: scratch_store_b32 off, v136, s32 offset:100 -; GFX11-NEXT: scratch_store_b32 off, v137, s32 offset:96 -; GFX11-NEXT: scratch_store_b32 off, v138, s32 offset:92 -; GFX11-NEXT: scratch_store_b32 off, v139, s32 offset:88 -; GFX11-NEXT: scratch_store_b32 off, v140, s32 offset:84 -; GFX11-NEXT: scratch_store_b32 off, v141, s32 offset:80 -; GFX11-NEXT: scratch_store_b32 off, v142, s32 offset:76 -; GFX11-NEXT: scratch_store_b32 off, v143, s32 offset:72 -; GFX11-NEXT: scratch_store_b32 off, v152, s32 offset:68 -; GFX11-NEXT: scratch_store_b32 off, v153, s32 offset:64 -; GFX11-NEXT: scratch_store_b32 off, v154, s32 offset:60 -; GFX11-NEXT: scratch_store_b32 off, v155, s32 offset:56 -; GFX11-NEXT: scratch_store_b32 off, v156, s32 offset:52 -; GFX11-NEXT: scratch_store_b32 off, v157, s32 offset:48 -; GFX11-NEXT: scratch_store_b32 off, v158, s32 offset:44 -; GFX11-NEXT: scratch_store_b32 off, v159, s32 offset:40 -; GFX11-NEXT: s_clause 0x9 ; 40-byte Folded Spill -; GFX11-NEXT: scratch_store_b32 off, v168, s32 offset:36 -; GFX11-NEXT: scratch_store_b32 off, v169, s32 offset:32 -; GFX11-NEXT: scratch_store_b32 off, v170, s32 offset:28 -; GFX11-NEXT: scratch_store_b32 off, v171, s32 offset:24 -; GFX11-NEXT: scratch_store_b32 off, v172, s32 offset:20 -; GFX11-NEXT: scratch_store_b32 off, v173, s32 offset:16 -; GFX11-NEXT: scratch_store_b32 off, v174, s32 offset:12 -; GFX11-NEXT: scratch_store_b32 off, v175, s32 offset:8 -; GFX11-NEXT: scratch_store_b32 off, v184, s32 offset:4 -; GFX11-NEXT: scratch_store_b32 off, v185, s32 -; GFX11-NEXT: v_dual_mov_b32 v176, v13 :: v_dual_mov_b32 v177, v12 -; GFX11-NEXT: v_dual_mov_b32 v178, v11 :: v_dual_mov_b32 v179, v10 -; GFX11-NEXT: v_dual_mov_b32 v180, v9 :: v_dual_mov_b32 v181, v8 -; GFX11-NEXT: v_dual_mov_b32 v182, v7 :: v_dual_mov_b32 v183, v6 -; GFX11-NEXT: v_dual_mov_b32 v170, v5 :: v_dual_mov_b32 v171, v4 -; GFX11-NEXT: v_dual_mov_b32 v172, v3 :: v_dual_mov_b32 v173, v2 -; GFX11-NEXT: v_dual_mov_b32 v174, v1 :: v_dual_mov_b32 v175, v0 -; GFX11-NEXT: v_dual_mov_b32 v184, s28 :: v_dual_mov_b32 v185, s29 +; GFX11-NEXT: v_dual_mov_b32 v31, v13 :: v_dual_mov_b32 v30, v12 +; GFX11-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 +; GFX11-NEXT: v_dual_mov_b32 v27, v9 :: v_dual_mov_b32 v26, v8 +; GFX11-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-NEXT: v_dual_mov_b32 v16, s28 :: v_dual_mov_b32 v17, s29 ; GFX11-NEXT: s_mov_b32 s4, 0 ; GFX11-NEXT: s_and_b32 s5, vcc_lo, exec_lo ; GFX11-NEXT: s_cbranch_scc0 .LBB87_4 ; GFX11-NEXT: ; %bb.1: ; %cmp.false -; GFX11-NEXT: v_dual_mov_b32 v47, s0 :: v_dual_mov_b32 v52, s2 -; GFX11-NEXT: v_dual_mov_b32 v49, s1 :: v_dual_mov_b32 v56, s3 -; GFX11-NEXT: v_dual_mov_b32 v61, s16 :: v_dual_mov_b32 v74, s18 -; GFX11-NEXT: v_dual_mov_b32 v67, s17 :: v_dual_mov_b32 v82, s19 -; GFX11-NEXT: v_dual_mov_b32 v91, s20 :: v_dual_mov_b32 v112, s22 -; GFX11-NEXT: v_dual_mov_b32 v101, s21 :: v_dual_mov_b32 v124, s23 -; GFX11-NEXT: v_dual_mov_b32 v137, s24 :: v_dual_mov_b32 v14, s26 -; GFX11-NEXT: v_dual_mov_b32 v151, s25 :: v_dual_mov_b32 v30, s27 +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-NEXT: v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v5, s17 +; GFX11-NEXT: v_dual_mov_b32 v6, s18 :: v_dual_mov_b32 v7, s19 +; GFX11-NEXT: v_dual_mov_b32 v8, s20 :: v_dual_mov_b32 v9, s21 +; GFX11-NEXT: v_dual_mov_b32 v10, s22 :: v_dual_mov_b32 v11, s23 +; GFX11-NEXT: v_dual_mov_b32 v12, s24 :: v_dual_mov_b32 v13, s25 +; GFX11-NEXT: v_dual_mov_b32 v14, s26 :: v_dual_mov_b32 v15, s27 ; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s4 ; GFX11-NEXT: s_cbranch_vccnz .LBB87_3 ; GFX11-NEXT: .LBB87_2: ; %cmp.true -; GFX11-NEXT: v_pk_add_u16 v30, s27, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v15, s27, 3 op_sel_hi:[1,0] ; GFX11-NEXT: v_pk_add_u16 v14, s26, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v176, v176, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v177, v177, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v178, v178, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v179, v179, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v180, v180, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v181, v181, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v182, v182, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v183, v183, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v170, v170, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v171, v171, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v172, v172, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v173, v173, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v174, v174, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v175, v175, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v185, v185, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v184, v184, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v151, s25, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v137, s24, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v124, s23, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v112, s22, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v101, s21, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v91, s20, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v82, s19, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v74, s18, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v67, s17, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v61, s16, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v56, s3, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v52, s2, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v49, s1, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v47, s0, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v13, s25, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v12, s24, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v11, s23, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v10, s22, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v9, s21, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v8, s20, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v7, s19, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v6, s18, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v5, s17, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v4, s16, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v3, s3, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v2, s2, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v1, s1, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v0, s0, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v31, v31, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v30, v30, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v29, v29, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v28, v28, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v27, v27, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v26, v26, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v25, v25, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v24, v24, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v23, v23, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v22, v22, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v21, v21, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v20, v20, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v19, v19, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v18, v18, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v17, v17, 3 op_sel_hi:[1,0] +; GFX11-NEXT: v_pk_add_u16 v16, v16, 3 op_sel_hi:[1,0] ; GFX11-NEXT: .LBB87_3: ; %end -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_dual_mov_b32 v0, v47 :: v_dual_mov_b32 v1, v49 -; GFX11-NEXT: v_dual_mov_b32 v3, v56 :: v_dual_mov_b32 v4, v61 -; GFX11-NEXT: v_dual_mov_b32 v6, v74 :: v_dual_mov_b32 v9, v101 -; GFX11-NEXT: v_dual_mov_b32 v7, v82 :: v_dual_mov_b32 v8, v91 -; GFX11-NEXT: v_dual_mov_b32 v11, v124 :: v_dual_mov_b32 v12, v137 -; GFX11-NEXT: v_dual_mov_b32 v15, v30 :: v_dual_mov_b32 v16, v184 -; GFX11-NEXT: v_dual_mov_b32 v17, v185 :: v_dual_mov_b32 v18, v175 -; GFX11-NEXT: v_dual_mov_b32 v19, v174 :: v_dual_mov_b32 v20, v173 -; GFX11-NEXT: v_dual_mov_b32 v21, v172 :: v_dual_mov_b32 v22, v171 -; GFX11-NEXT: v_dual_mov_b32 v23, v170 :: v_dual_mov_b32 v24, v183 -; GFX11-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-NEXT: scratch_load_b32 v185, off, s32 -; GFX11-NEXT: scratch_load_b32 v184, off, s32 offset:4 -; GFX11-NEXT: scratch_load_b32 v175, off, s32 offset:8 -; GFX11-NEXT: scratch_load_b32 v174, off, s32 offset:12 -; GFX11-NEXT: scratch_load_b32 v173, off, s32 offset:16 -; GFX11-NEXT: scratch_load_b32 v172, off, s32 offset:20 -; GFX11-NEXT: scratch_load_b32 v171, off, s32 offset:24 -; GFX11-NEXT: scratch_load_b32 v170, off, s32 offset:28 -; GFX11-NEXT: scratch_load_b32 v169, off, s32 offset:32 -; GFX11-NEXT: scratch_load_b32 v168, off, s32 offset:36 -; GFX11-NEXT: scratch_load_b32 v159, off, s32 offset:40 -; GFX11-NEXT: scratch_load_b32 v158, off, s32 offset:44 -; GFX11-NEXT: scratch_load_b32 v157, off, s32 offset:48 -; GFX11-NEXT: scratch_load_b32 v156, off, s32 offset:52 -; GFX11-NEXT: scratch_load_b32 v155, off, s32 offset:56 -; GFX11-NEXT: scratch_load_b32 v154, off, s32 offset:60 -; GFX11-NEXT: scratch_load_b32 v153, off, s32 offset:64 -; GFX11-NEXT: scratch_load_b32 v152, off, s32 offset:68 -; GFX11-NEXT: scratch_load_b32 v143, off, s32 offset:72 -; GFX11-NEXT: scratch_load_b32 v142, off, s32 offset:76 -; GFX11-NEXT: scratch_load_b32 v141, off, s32 offset:80 -; GFX11-NEXT: scratch_load_b32 v140, off, s32 offset:84 -; GFX11-NEXT: scratch_load_b32 v139, off, s32 offset:88 -; GFX11-NEXT: scratch_load_b32 v138, off, s32 offset:92 -; GFX11-NEXT: scratch_load_b32 v137, off, s32 offset:96 -; GFX11-NEXT: scratch_load_b32 v136, off, s32 offset:100 -; GFX11-NEXT: scratch_load_b32 v127, off, s32 offset:104 -; GFX11-NEXT: scratch_load_b32 v126, off, s32 offset:108 -; GFX11-NEXT: scratch_load_b32 v125, off, s32 offset:112 -; GFX11-NEXT: scratch_load_b32 v124, off, s32 offset:116 -; GFX11-NEXT: scratch_load_b32 v123, off, s32 offset:120 -; GFX11-NEXT: scratch_load_b32 v122, off, s32 offset:124 -; GFX11-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-NEXT: scratch_load_b32 v121, off, s32 offset:128 -; GFX11-NEXT: scratch_load_b32 v120, off, s32 offset:132 -; GFX11-NEXT: scratch_load_b32 v111, off, s32 offset:136 -; GFX11-NEXT: scratch_load_b32 v110, off, s32 offset:140 -; GFX11-NEXT: scratch_load_b32 v109, off, s32 offset:144 -; GFX11-NEXT: scratch_load_b32 v108, off, s32 offset:148 -; GFX11-NEXT: scratch_load_b32 v107, off, s32 offset:152 -; GFX11-NEXT: scratch_load_b32 v106, off, s32 offset:156 -; GFX11-NEXT: scratch_load_b32 v105, off, s32 offset:160 -; GFX11-NEXT: scratch_load_b32 v104, off, s32 offset:164 -; GFX11-NEXT: scratch_load_b32 v95, off, s32 offset:168 -; GFX11-NEXT: scratch_load_b32 v94, off, s32 offset:172 -; GFX11-NEXT: scratch_load_b32 v93, off, s32 offset:176 -; GFX11-NEXT: scratch_load_b32 v92, off, s32 offset:180 -; GFX11-NEXT: scratch_load_b32 v91, off, s32 offset:184 -; GFX11-NEXT: scratch_load_b32 v90, off, s32 offset:188 -; GFX11-NEXT: scratch_load_b32 v89, off, s32 offset:192 -; GFX11-NEXT: scratch_load_b32 v88, off, s32 offset:196 -; GFX11-NEXT: scratch_load_b32 v79, off, s32 offset:200 -; GFX11-NEXT: scratch_load_b32 v78, off, s32 offset:204 -; GFX11-NEXT: scratch_load_b32 v77, off, s32 offset:208 -; GFX11-NEXT: scratch_load_b32 v76, off, s32 offset:212 -; GFX11-NEXT: scratch_load_b32 v75, off, s32 offset:216 -; GFX11-NEXT: scratch_load_b32 v74, off, s32 offset:220 -; GFX11-NEXT: scratch_load_b32 v73, off, s32 offset:224 -; GFX11-NEXT: scratch_load_b32 v72, off, s32 offset:228 -; GFX11-NEXT: scratch_load_b32 v63, off, s32 offset:232 -; GFX11-NEXT: scratch_load_b32 v62, off, s32 offset:236 -; GFX11-NEXT: scratch_load_b32 v61, off, s32 offset:240 -; GFX11-NEXT: scratch_load_b32 v60, off, s32 offset:244 -; GFX11-NEXT: scratch_load_b32 v59, off, s32 offset:248 -; GFX11-NEXT: scratch_load_b32 v58, off, s32 offset:252 -; GFX11-NEXT: s_clause 0x9 ; 40-byte Folded Reload -; GFX11-NEXT: scratch_load_b32 v57, off, s32 offset:256 -; GFX11-NEXT: scratch_load_b32 v56, off, s32 offset:260 -; GFX11-NEXT: scratch_load_b32 v47, off, s32 offset:264 -; GFX11-NEXT: scratch_load_b32 v46, off, s32 offset:268 -; GFX11-NEXT: scratch_load_b32 v45, off, s32 offset:272 -; GFX11-NEXT: scratch_load_b32 v44, off, s32 offset:276 -; GFX11-NEXT: scratch_load_b32 v43, off, s32 offset:280 -; GFX11-NEXT: scratch_load_b32 v42, off, s32 offset:284 -; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:288 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:292 -; GFX11-NEXT: v_dual_mov_b32 v2, v52 :: v_dual_mov_b32 v5, v67 -; GFX11-NEXT: v_dual_mov_b32 v10, v112 :: v_dual_mov_b32 v13, v151 -; GFX11-NEXT: v_dual_mov_b32 v25, v182 :: v_dual_mov_b32 v26, v181 -; GFX11-NEXT: v_dual_mov_b32 v27, v180 :: v_dual_mov_b32 v28, v179 -; GFX11-NEXT: v_dual_mov_b32 v29, v178 :: v_dual_mov_b32 v30, v177 -; GFX11-NEXT: v_mov_b32_e32 v31, v176 -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; GFX11-NEXT: .LBB87_4: -; GFX11-NEXT: ; implicit-def: $vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78 -; GFX11-NEXT: ; implicit-def: $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79 -; GFX11-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-NEXT: ; implicit-def: $vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81 -; GFX11-NEXT: ; implicit-def: $vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84 -; GFX11-NEXT: ; implicit-def: $vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88 -; GFX11-NEXT: ; implicit-def: $vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93 -; GFX11-NEXT: ; implicit-def: $vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99 -; GFX11-NEXT: ; implicit-def: $vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106 -; GFX11-NEXT: ; implicit-def: $vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114 -; GFX11-NEXT: ; implicit-def: $vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123 -; GFX11-NEXT: ; implicit-def: $vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133 -; GFX11-NEXT: ; implicit-def: $vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144 -; GFX11-NEXT: ; implicit-def: $vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156 -; GFX11-NEXT: ; implicit-def: $vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169 +; GFX11-NEXT: ; implicit-def: $vgpr0 +; GFX11-NEXT: ; implicit-def: $vgpr1 +; GFX11-NEXT: ; implicit-def: $vgpr2 +; GFX11-NEXT: ; implicit-def: $vgpr3 +; GFX11-NEXT: ; implicit-def: $vgpr4 +; GFX11-NEXT: ; implicit-def: $vgpr5 +; GFX11-NEXT: ; implicit-def: $vgpr6 +; GFX11-NEXT: ; implicit-def: $vgpr7 +; GFX11-NEXT: ; implicit-def: $vgpr8 +; GFX11-NEXT: ; implicit-def: $vgpr9 +; GFX11-NEXT: ; implicit-def: $vgpr10 +; GFX11-NEXT: ; implicit-def: $vgpr11 +; GFX11-NEXT: ; implicit-def: $vgpr12 +; GFX11-NEXT: ; implicit-def: $vgpr13 +; GFX11-NEXT: ; implicit-def: $vgpr14 +; GFX11-NEXT: ; implicit-def: $vgpr15 ; GFX11-NEXT: s_branch .LBB87_2 %cmp = icmp eq i32 %b, 0 br i1 %cmp, label %cmp.true, label %cmp.false diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.640bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.640bit.ll index 8964ebd9cbd70..889e52cc09e2b 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.640bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.640bit.ll @@ -4911,269 +4911,100 @@ define inreg <20 x i32> @bitcast_v40i16_to_v20i32_scalar(<40 x i16> inreg %a, i3 ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:172 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:44 -; GFX11-TRUE16-NEXT: s_clause 0xa ; 44-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v1 :: v_dual_mov_b32 v186, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB15_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB15_3 ; GFX11-TRUE16-NEXT: .LBB15_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s40, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s41, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v186, v186, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v185, v185, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s0, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s1, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s2, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, s3, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v27, s4, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v35, s5, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v44, s6, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v54, s7, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v65, s8, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v77, s9, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v90, s10, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v104, s11, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v119, s12, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v135, s13, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v152, s14, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v170, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s0, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v1, s1, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s2, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v3, s3, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v4, s4, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s5, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v6, s6, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v7, s7, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v8, s8, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s9, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v10, s10, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v11, s11, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v12, s12, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v13, s13, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s14, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v15, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v16, s16, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v17, s17, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v18, v18, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v19, v19, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: .LBB15_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v17, v170 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v18, v186 :: v_dual_mov_b32 v19, v185 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xa ; 44-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:296 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v14, v119 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB15_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB15_2 ; ; GFX11-FAKE16-LABEL: bitcast_v40i16_to_v20i32_scalar: @@ -8517,269 +8348,100 @@ define inreg <20 x i32> @bitcast_v40f16_to_v20i32_scalar(<40 x half> inreg %a, i ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:172 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:44 -; GFX11-TRUE16-NEXT: s_clause 0xa ; 44-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v1 :: v_dual_mov_b32 v186, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB19_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB19_3 ; GFX11-TRUE16-NEXT: .LBB19_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s40 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s41 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v186, 0x200, v186 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v185, 0x200, v185 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s0 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s1 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s2 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, s3 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v27, 0x200, s4 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v35, 0x200, s5 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v44, 0x200, s6 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v54, 0x200, s7 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v65, 0x200, s8 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v77, 0x200, s9 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v90, 0x200, s10 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v104, 0x200, s11 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v119, 0x200, s12 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v135, 0x200, s13 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v152, 0x200, s14 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v170, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s0 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, 0x200, s1 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s2 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v3, 0x200, s3 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v4, 0x200, s4 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s5 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v6, 0x200, s6 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v7, 0x200, s7 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v8, 0x200, s8 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s9 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v10, 0x200, s10 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v11, 0x200, s11 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v12, 0x200, s12 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v13, 0x200, s13 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s14 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v15, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v16, 0x200, s16 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v17, 0x200, s17 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v18, 0x200, v18 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v19, 0x200, v19 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: .LBB19_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v17, v170 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v18, v186 :: v_dual_mov_b32 v19, v185 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xa ; 44-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:296 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v14, v119 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB19_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB19_2 ; ; GFX11-FAKE16-LABEL: bitcast_v40f16_to_v20i32_scalar: @@ -13107,269 +12769,100 @@ define inreg <20 x float> @bitcast_v40i16_to_v20f32_scalar(<40 x i16> inreg %a, ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:172 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:44 -; GFX11-TRUE16-NEXT: s_clause 0xa ; 44-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v1 :: v_dual_mov_b32 v186, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB31_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB31_3 ; GFX11-TRUE16-NEXT: .LBB31_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s40, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s41, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v186, v186, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v185, v185, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s0, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s1, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s2, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, s3, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v27, s4, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v35, s5, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v44, s6, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v54, s7, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v65, s8, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v77, s9, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v90, s10, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v104, s11, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v119, s12, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v135, s13, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v152, s14, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v170, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s0, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v1, s1, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s2, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v3, s3, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v4, s4, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s5, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v6, s6, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v7, s7, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v8, s8, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s9, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v10, s10, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v11, s11, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v12, s12, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v13, s13, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s14, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v15, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v16, s16, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v17, s17, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v18, v18, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v19, v19, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: .LBB31_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v17, v170 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v18, v186 :: v_dual_mov_b32 v19, v185 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xa ; 44-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:296 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v14, v119 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB31_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB31_2 ; ; GFX11-FAKE16-LABEL: bitcast_v40i16_to_v20f32_scalar: @@ -16827,269 +16320,100 @@ define inreg <20 x float> @bitcast_v40f16_to_v20f32_scalar(<40 x half> inreg %a, ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:172 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:44 -; GFX11-TRUE16-NEXT: s_clause 0xa ; 44-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v1 :: v_dual_mov_b32 v186, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB35_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB35_3 ; GFX11-TRUE16-NEXT: .LBB35_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s40 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s41 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v186, 0x200, v186 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v185, 0x200, v185 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s0 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s1 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s2 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, s3 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v27, 0x200, s4 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v35, 0x200, s5 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v44, 0x200, s6 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v54, 0x200, s7 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v65, 0x200, s8 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v77, 0x200, s9 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v90, 0x200, s10 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v104, 0x200, s11 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v119, 0x200, s12 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v135, 0x200, s13 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v152, 0x200, s14 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v170, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s0 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, 0x200, s1 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s2 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v3, 0x200, s3 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v4, 0x200, s4 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s5 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v6, 0x200, s6 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v7, 0x200, s7 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v8, 0x200, s8 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s9 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v10, 0x200, s10 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v11, 0x200, s11 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v12, 0x200, s12 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v13, 0x200, s13 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s14 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v15, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v16, 0x200, s16 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v17, 0x200, s17 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v18, 0x200, v18 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v19, 0x200, v19 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: .LBB35_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v17, v170 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v18, v186 :: v_dual_mov_b32 v19, v185 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xa ; 44-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:296 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v14, v119 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB35_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB35_2 ; ; GFX11-FAKE16-LABEL: bitcast_v40f16_to_v20f32_scalar: @@ -20613,269 +19937,100 @@ define inreg <10 x i64> @bitcast_v40i16_to_v10i64_scalar(<40 x i16> inreg %a, i3 ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:172 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:44 -; GFX11-TRUE16-NEXT: s_clause 0xa ; 44-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v1 :: v_dual_mov_b32 v186, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB43_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB43_3 ; GFX11-TRUE16-NEXT: .LBB43_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s40, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s41, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v186, v186, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v185, v185, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s0, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s1, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s2, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, s3, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v27, s4, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v35, s5, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v44, s6, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v54, s7, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v65, s8, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v77, s9, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v90, s10, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v104, s11, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v119, s12, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v135, s13, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v152, s14, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v170, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s0, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v1, s1, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s2, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v3, s3, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v4, s4, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s5, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v6, s6, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v7, s7, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v8, s8, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s9, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v10, s10, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v11, s11, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v12, s12, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v13, s13, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s14, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v15, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v16, s16, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v17, s17, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v18, v18, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v19, v19, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: .LBB43_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v17, v170 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v18, v186 :: v_dual_mov_b32 v19, v185 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xa ; 44-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:296 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v14, v119 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB43_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB43_2 ; ; GFX11-FAKE16-LABEL: bitcast_v40i16_to_v10i64_scalar: @@ -24229,269 +23384,100 @@ define inreg <10 x i64> @bitcast_v40f16_to_v10i64_scalar(<40 x half> inreg %a, i ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:172 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:44 -; GFX11-TRUE16-NEXT: s_clause 0xa ; 44-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v1 :: v_dual_mov_b32 v186, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB47_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB47_3 ; GFX11-TRUE16-NEXT: .LBB47_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s40 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s41 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v186, 0x200, v186 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v185, 0x200, v185 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s0 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s1 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s2 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, s3 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v27, 0x200, s4 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v35, 0x200, s5 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v44, 0x200, s6 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v54, 0x200, s7 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v65, 0x200, s8 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v77, 0x200, s9 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v90, 0x200, s10 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v104, 0x200, s11 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v119, 0x200, s12 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v135, 0x200, s13 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v152, 0x200, s14 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v170, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s0 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, 0x200, s1 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s2 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v3, 0x200, s3 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v4, 0x200, s4 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s5 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v6, 0x200, s6 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v7, 0x200, s7 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v8, 0x200, s8 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s9 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v10, 0x200, s10 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v11, 0x200, s11 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v12, 0x200, s12 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v13, 0x200, s13 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s14 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v15, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v16, 0x200, s16 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v17, 0x200, s17 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v18, 0x200, v18 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v19, 0x200, v19 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: .LBB47_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v17, v170 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v18, v186 :: v_dual_mov_b32 v19, v185 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xa ; 44-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:296 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v14, v119 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB47_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB47_2 ; ; GFX11-FAKE16-LABEL: bitcast_v40f16_to_v10i64_scalar: @@ -27349,269 +26335,100 @@ define inreg <10 x double> @bitcast_v40i16_to_v10f64_scalar(<40 x i16> inreg %a, ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:172 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:44 -; GFX11-TRUE16-NEXT: s_clause 0xa ; 44-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v1 :: v_dual_mov_b32 v186, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB51_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB51_3 ; GFX11-TRUE16-NEXT: .LBB51_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s40, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s41, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v186, v186, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v185, v185, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s0, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s1, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s2, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, s3, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v27, s4, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v35, s5, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v44, s6, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v54, s7, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v65, s8, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v77, s9, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v90, s10, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v104, s11, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v119, s12, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v135, s13, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v152, s14, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v170, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s0, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v1, s1, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s2, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v3, s3, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v4, s4, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s5, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v6, s6, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v7, s7, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v8, s8, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s9, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v10, s10, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v11, s11, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v12, s12, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v13, s13, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s14, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v15, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v16, s16, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v17, s17, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v18, v18, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v19, v19, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: .LBB51_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v17, v170 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v18, v186 :: v_dual_mov_b32 v19, v185 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xa ; 44-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:296 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v14, v119 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB51_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB51_2 ; ; GFX11-FAKE16-LABEL: bitcast_v40i16_to_v10f64_scalar: @@ -31002,269 +29819,100 @@ define inreg <10 x double> @bitcast_v40f16_to_v10f64_scalar(<40 x half> inreg %a ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:172 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:44 -; GFX11-TRUE16-NEXT: s_clause 0xa ; 44-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v1 :: v_dual_mov_b32 v186, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB55_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB55_3 ; GFX11-TRUE16-NEXT: .LBB55_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s40 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s41 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v186, 0x200, v186 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v185, 0x200, v185 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s0 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s1 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s2 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, s3 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v27, 0x200, s4 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v35, 0x200, s5 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v44, 0x200, s6 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v54, 0x200, s7 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v65, 0x200, s8 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v77, 0x200, s9 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v90, 0x200, s10 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v104, 0x200, s11 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v119, 0x200, s12 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v135, 0x200, s13 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v152, 0x200, s14 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v170, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s0 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, 0x200, s1 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s2 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v3, 0x200, s3 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v4, 0x200, s4 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s5 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v6, 0x200, s6 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v7, 0x200, s7 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v8, 0x200, s8 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s9 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v10, 0x200, s10 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v11, 0x200, s11 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v12, 0x200, s12 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v13, 0x200, s13 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s14 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v15, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v16, 0x200, s16 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v17, 0x200, s17 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v18, 0x200, v18 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v19, 0x200, v19 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: .LBB55_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v17, v170 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v18, v186 :: v_dual_mov_b32 v19, v185 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xa ; 44-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:296 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v14, v119 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB55_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB55_2 ; ; GFX11-FAKE16-LABEL: bitcast_v40f16_to_v10f64_scalar: diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.704bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.704bit.ll index ed407c1e20c14..04eb5706d503b 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.704bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.704bit.ll @@ -5328,277 +5328,103 @@ define inreg <22 x i32> @bitcast_v44i16_to_v22i32_scalar(<44 x i16> inreg %a, i3 ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:180 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:52 -; GFX11-TRUE16-NEXT: s_clause 0xc ; 52-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v3 :: v_dual_mov_b32 v186, v2 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v1 :: v_dual_mov_b32 v188, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB15_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB15_3 ; GFX11-TRUE16-NEXT: .LBB15_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s40, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s41, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v188, v188, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v187, v187, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v186, v186, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v185, v185, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s0, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s1, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s2, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, s3, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v27, s4, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v35, s5, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v44, s6, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v54, s7, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v65, s8, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v77, s9, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v90, s10, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v104, s11, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v119, s12, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v135, s13, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v152, s14, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v170, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s0, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v1, s1, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s2, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v3, s3, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v4, s4, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s5, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v6, s6, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v7, s7, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v8, s8, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s9, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v10, s10, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v11, s11, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v12, s12, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v13, s13, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s14, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v15, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v16, s16, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v17, s17, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v18, v18, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v19, v19, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, v20, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v21, v21, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: .LBB15_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v18, v188 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v187 :: v_dual_mov_b32 v20, v186 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v21, v185 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xc ; 52-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:304 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v14, v119 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB15_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB15_2 ; ; GFX11-FAKE16-LABEL: bitcast_v44i16_to_v22i32_scalar: @@ -9310,277 +9136,103 @@ define inreg <22 x i32> @bitcast_v44f16_to_v22i32_scalar(<44 x half> inreg %a, i ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:180 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:52 -; GFX11-TRUE16-NEXT: s_clause 0xc ; 52-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v3 :: v_dual_mov_b32 v186, v2 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v1 :: v_dual_mov_b32 v188, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB19_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB19_3 ; GFX11-TRUE16-NEXT: .LBB19_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s40 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s41 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v188, 0x200, v188 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v187, 0x200, v187 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v186, 0x200, v186 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v185, 0x200, v185 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s0 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s1 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s2 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, s3 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v27, 0x200, s4 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v35, 0x200, s5 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v44, 0x200, s6 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v54, 0x200, s7 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v65, 0x200, s8 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v77, 0x200, s9 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v90, 0x200, s10 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v104, 0x200, s11 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v119, 0x200, s12 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v135, 0x200, s13 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v152, 0x200, s14 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v170, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s0 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, 0x200, s1 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s2 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v3, 0x200, s3 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v4, 0x200, s4 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s5 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v6, 0x200, s6 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v7, 0x200, s7 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v8, 0x200, s8 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s9 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v10, 0x200, s10 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v11, 0x200, s11 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v12, 0x200, s12 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v13, 0x200, s13 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s14 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v15, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v16, 0x200, s16 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v17, 0x200, s17 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v18, 0x200, v18 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v19, 0x200, v19 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, v20 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v21, 0x200, v21 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: .LBB19_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v18, v188 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v187 :: v_dual_mov_b32 v20, v186 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v21, v185 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xc ; 52-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:304 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v14, v119 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB19_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB19_2 ; ; GFX11-FAKE16-LABEL: bitcast_v44f16_to_v22i32_scalar: @@ -14290,277 +13942,103 @@ define inreg <22 x float> @bitcast_v44i16_to_v22f32_scalar(<44 x i16> inreg %a, ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:180 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:52 -; GFX11-TRUE16-NEXT: s_clause 0xc ; 52-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v3 :: v_dual_mov_b32 v186, v2 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v1 :: v_dual_mov_b32 v188, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB31_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB31_3 ; GFX11-TRUE16-NEXT: .LBB31_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s40, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s41, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v188, v188, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v187, v187, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v186, v186, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v185, v185, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s0, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s1, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s2, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, s3, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v27, s4, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v35, s5, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v44, s6, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v54, s7, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v65, s8, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v77, s9, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v90, s10, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v104, s11, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v119, s12, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v135, s13, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v152, s14, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v170, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s0, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v1, s1, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s2, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v3, s3, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v4, s4, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s5, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v6, s6, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v7, s7, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v8, s8, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s9, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v10, s10, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v11, s11, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v12, s12, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v13, s13, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s14, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v15, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v16, s16, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v17, s17, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v18, v18, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v19, v19, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, v20, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v21, v21, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: .LBB31_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v18, v188 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v187 :: v_dual_mov_b32 v20, v186 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v21, v185 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xc ; 52-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:304 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v14, v119 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB31_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB31_2 ; ; GFX11-FAKE16-LABEL: bitcast_v44i16_to_v22f32_scalar: @@ -18405,277 +17883,103 @@ define inreg <22 x float> @bitcast_v44f16_to_v22f32_scalar(<44 x half> inreg %a, ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:180 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:52 -; GFX11-TRUE16-NEXT: s_clause 0xc ; 52-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v3 :: v_dual_mov_b32 v186, v2 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v1 :: v_dual_mov_b32 v188, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB35_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB35_3 ; GFX11-TRUE16-NEXT: .LBB35_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s40 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s41 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v188, 0x200, v188 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v187, 0x200, v187 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v186, 0x200, v186 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v185, 0x200, v185 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s0 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s1 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s2 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, s3 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v27, 0x200, s4 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v35, 0x200, s5 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v44, 0x200, s6 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v54, 0x200, s7 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v65, 0x200, s8 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v77, 0x200, s9 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v90, 0x200, s10 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v104, 0x200, s11 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v119, 0x200, s12 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v135, 0x200, s13 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v152, 0x200, s14 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v170, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s0 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, 0x200, s1 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s2 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v3, 0x200, s3 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v4, 0x200, s4 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s5 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v6, 0x200, s6 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v7, 0x200, s7 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v8, 0x200, s8 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s9 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v10, 0x200, s10 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v11, 0x200, s11 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v12, 0x200, s12 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v13, 0x200, s13 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s14 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v15, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v16, 0x200, s16 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v17, 0x200, s17 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v18, 0x200, v18 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v19, 0x200, v19 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, v20 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v21, 0x200, v21 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: .LBB35_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v18, v188 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v187 :: v_dual_mov_b32 v20, v186 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v21, v185 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xc ; 52-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:304 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v14, v119 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB35_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB35_2 ; ; GFX11-FAKE16-LABEL: bitcast_v44f16_to_v22f32_scalar: @@ -22538,277 +21842,103 @@ define inreg <11 x i64> @bitcast_v44i16_to_v11i64_scalar(<44 x i16> inreg %a, i3 ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:180 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:52 -; GFX11-TRUE16-NEXT: s_clause 0xc ; 52-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v3 :: v_dual_mov_b32 v186, v2 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v1 :: v_dual_mov_b32 v188, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB43_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB43_3 ; GFX11-TRUE16-NEXT: .LBB43_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s40, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s41, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v188, v188, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v187, v187, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v186, v186, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v185, v185, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s0, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s1, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s2, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, s3, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v27, s4, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v35, s5, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v44, s6, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v54, s7, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v65, s8, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v77, s9, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v90, s10, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v104, s11, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v119, s12, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v135, s13, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v152, s14, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v170, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s0, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v1, s1, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s2, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v3, s3, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v4, s4, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s5, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v6, s6, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v7, s7, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v8, s8, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s9, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v10, s10, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v11, s11, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v12, s12, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v13, s13, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s14, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v15, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v16, s16, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v17, s17, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v18, v18, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v19, v19, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, v20, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v21, v21, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: .LBB43_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v18, v188 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v187 :: v_dual_mov_b32 v20, v186 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v21, v185 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xc ; 52-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:304 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v14, v119 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB43_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB43_2 ; ; GFX11-FAKE16-LABEL: bitcast_v44i16_to_v11i64_scalar: @@ -26532,277 +25662,103 @@ define inreg <11 x i64> @bitcast_v44f16_to_v11i64_scalar(<44 x half> inreg %a, i ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:180 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:52 -; GFX11-TRUE16-NEXT: s_clause 0xc ; 52-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v3 :: v_dual_mov_b32 v186, v2 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v1 :: v_dual_mov_b32 v188, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB47_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB47_3 ; GFX11-TRUE16-NEXT: .LBB47_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s40 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s41 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v188, 0x200, v188 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v187, 0x200, v187 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v186, 0x200, v186 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v185, 0x200, v185 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s0 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s1 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s2 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, s3 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v27, 0x200, s4 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v35, 0x200, s5 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v44, 0x200, s6 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v54, 0x200, s7 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v65, 0x200, s8 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v77, 0x200, s9 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v90, 0x200, s10 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v104, 0x200, s11 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v119, 0x200, s12 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v135, 0x200, s13 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v152, 0x200, s14 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v170, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s0 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, 0x200, s1 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s2 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v3, 0x200, s3 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v4, 0x200, s4 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s5 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v6, 0x200, s6 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v7, 0x200, s7 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v8, 0x200, s8 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s9 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v10, 0x200, s10 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v11, 0x200, s11 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v12, 0x200, s12 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v13, 0x200, s13 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s14 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v15, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v16, 0x200, s16 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v17, 0x200, s17 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v18, 0x200, v18 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v19, 0x200, v19 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, v20 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v21, 0x200, v21 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: .LBB47_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v18, v188 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v187 :: v_dual_mov_b32 v20, v186 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v21, v185 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xc ; 52-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:304 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v14, v119 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB47_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB47_2 ; ; GFX11-FAKE16-LABEL: bitcast_v44f16_to_v11i64_scalar: @@ -29953,277 +28909,103 @@ define inreg <11 x double> @bitcast_v44i16_to_v11f64_scalar(<44 x i16> inreg %a, ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:180 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:52 -; GFX11-TRUE16-NEXT: s_clause 0xc ; 52-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v3 :: v_dual_mov_b32 v186, v2 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v1 :: v_dual_mov_b32 v188, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB51_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB51_3 ; GFX11-TRUE16-NEXT: .LBB51_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s40, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s41, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v188, v188, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v187, v187, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v186, v186, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v185, v185, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s0, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s1, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s2, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, s3, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v27, s4, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v35, s5, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v44, s6, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v54, s7, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v65, s8, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v77, s9, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v90, s10, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v104, s11, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v119, s12, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v135, s13, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v152, s14, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v170, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s0, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v1, s1, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s2, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v3, s3, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v4, s4, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s5, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v6, s6, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v7, s7, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v8, s8, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s9, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v10, s10, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v11, s11, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v12, s12, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v13, s13, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s14, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v15, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v16, s16, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v17, s17, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v18, v18, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v19, v19, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, v20, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v21, v21, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: .LBB51_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v18, v188 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v187 :: v_dual_mov_b32 v20, v186 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v21, v185 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xc ; 52-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:304 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v14, v119 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB51_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB51_2 ; ; GFX11-FAKE16-LABEL: bitcast_v44i16_to_v11f64_scalar: @@ -33992,277 +32774,103 @@ define inreg <11 x double> @bitcast_v44f16_to_v11f64_scalar(<44 x half> inreg %a ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:180 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:52 -; GFX11-TRUE16-NEXT: s_clause 0xc ; 52-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v3 :: v_dual_mov_b32 v186, v2 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v1 :: v_dual_mov_b32 v188, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB55_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB55_3 ; GFX11-TRUE16-NEXT: .LBB55_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s40 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s41 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v188, 0x200, v188 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v187, 0x200, v187 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v186, 0x200, v186 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v185, 0x200, v185 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s0 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s1 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s2 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, s3 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v27, 0x200, s4 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v35, 0x200, s5 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v44, 0x200, s6 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v54, 0x200, s7 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v65, 0x200, s8 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v77, 0x200, s9 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v90, 0x200, s10 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v104, 0x200, s11 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v119, 0x200, s12 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v135, 0x200, s13 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v152, 0x200, s14 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v170, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s0 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, 0x200, s1 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s2 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v3, 0x200, s3 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v4, 0x200, s4 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s5 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v6, 0x200, s6 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v7, 0x200, s7 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v8, 0x200, s8 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s9 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v10, 0x200, s10 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v11, 0x200, s11 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v12, 0x200, s12 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v13, 0x200, s13 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s14 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v15, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v16, 0x200, s16 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v17, 0x200, s17 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v18, 0x200, v18 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v19, 0x200, v19 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, v20 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v21, 0x200, v21 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: .LBB55_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v18, v188 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v187 :: v_dual_mov_b32 v20, v186 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v21, v185 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xc ; 52-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:304 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v14, v119 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB55_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB55_2 ; ; GFX11-FAKE16-LABEL: bitcast_v44f16_to_v11f64_scalar: diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.768bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.768bit.ll index 9ec3f5c00ee23..bb3fa73a2ecb5 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.768bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.768bit.ll @@ -5799,285 +5799,106 @@ define inreg <24 x i32> @bitcast_v48i16_to_v24i32_scalar(<48 x i16> inreg %a, i3 ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:188 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:60 -; GFX11-TRUE16-NEXT: s_clause 0xe ; 60-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v5 :: v_dual_mov_b32 v186, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v3 :: v_dual_mov_b32 v188, v2 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v1 :: v_dual_mov_b32 v190, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB15_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB15_3 ; GFX11-TRUE16-NEXT: .LBB15_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s40, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s41, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v190, v190, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v189, v189, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v188, v188, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v187, v187, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v186, v186, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v185, v185, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s0, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s1, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s2, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, s3, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v27, s4, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v35, s5, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v44, s6, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v54, s7, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v65, s8, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v77, s9, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v90, s10, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v104, s11, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v119, s12, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v135, s13, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v152, s14, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v170, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s0, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v1, s1, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s2, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v3, s3, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v4, s4, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s5, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v6, s6, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v7, s7, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v8, s8, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s9, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v10, s10, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v11, s11, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v12, s12, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v13, s13, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s14, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v15, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v16, s16, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v17, s17, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v18, v18, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v19, v19, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, v20, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v21, v21, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v22, v22, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v23, v23, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: .LBB15_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v20, v188 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v18, v190 :: v_dual_mov_b32 v19, v189 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v187 :: v_dual_mov_b32 v22, v186 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v23, v185 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v190, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v189, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xe ; 60-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:312 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v14, v119 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB15_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB15_2 ; ; GFX11-FAKE16-LABEL: bitcast_v48i16_to_v24i32_scalar: @@ -10207,285 +10028,106 @@ define inreg <24 x i32> @bitcast_v48f16_to_v24i32_scalar(<48 x half> inreg %a, i ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:188 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:60 -; GFX11-TRUE16-NEXT: s_clause 0xe ; 60-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v5 :: v_dual_mov_b32 v186, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v3 :: v_dual_mov_b32 v188, v2 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v1 :: v_dual_mov_b32 v190, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB19_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB19_3 ; GFX11-TRUE16-NEXT: .LBB19_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s40 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s41 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v190, 0x200, v190 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v189, 0x200, v189 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v188, 0x200, v188 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v187, 0x200, v187 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v186, 0x200, v186 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v185, 0x200, v185 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s0 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s1 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s2 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, s3 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v27, 0x200, s4 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v35, 0x200, s5 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v44, 0x200, s6 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v54, 0x200, s7 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v65, 0x200, s8 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v77, 0x200, s9 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v90, 0x200, s10 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v104, 0x200, s11 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v119, 0x200, s12 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v135, 0x200, s13 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v152, 0x200, s14 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v170, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s0 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, 0x200, s1 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s2 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v3, 0x200, s3 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v4, 0x200, s4 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s5 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v6, 0x200, s6 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v7, 0x200, s7 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v8, 0x200, s8 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s9 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v10, 0x200, s10 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v11, 0x200, s11 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v12, 0x200, s12 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v13, 0x200, s13 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s14 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v15, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v16, 0x200, s16 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v17, 0x200, s17 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v18, 0x200, v18 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v19, 0x200, v19 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, v20 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v21, 0x200, v21 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v22, 0x200, v22 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v23, 0x200, v23 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: .LBB19_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v20, v188 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v18, v190 :: v_dual_mov_b32 v19, v189 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v187 :: v_dual_mov_b32 v22, v186 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v23, v185 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v190, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v189, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xe ; 60-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:312 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v14, v119 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB19_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB19_2 ; ; GFX11-FAKE16-LABEL: bitcast_v48f16_to_v24i32_scalar: @@ -15629,285 +15271,106 @@ define inreg <24 x float> @bitcast_v48i16_to_v24f32_scalar(<48 x i16> inreg %a, ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:188 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:60 -; GFX11-TRUE16-NEXT: s_clause 0xe ; 60-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v5 :: v_dual_mov_b32 v186, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v3 :: v_dual_mov_b32 v188, v2 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v1 :: v_dual_mov_b32 v190, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB31_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB31_3 ; GFX11-TRUE16-NEXT: .LBB31_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s40, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s41, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v190, v190, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v189, v189, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v188, v188, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v187, v187, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v186, v186, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v185, v185, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s0, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s1, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s2, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, s3, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v27, s4, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v35, s5, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v44, s6, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v54, s7, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v65, s8, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v77, s9, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v90, s10, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v104, s11, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v119, s12, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v135, s13, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v152, s14, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v170, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s0, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v1, s1, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s2, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v3, s3, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v4, s4, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s5, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v6, s6, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v7, s7, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v8, s8, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s9, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v10, s10, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v11, s11, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v12, s12, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v13, s13, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s14, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v15, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v16, s16, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v17, s17, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v18, v18, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v19, v19, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, v20, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v21, v21, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v22, v22, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v23, v23, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: .LBB31_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v20, v188 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v18, v190 :: v_dual_mov_b32 v19, v189 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v187 :: v_dual_mov_b32 v22, v186 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v23, v185 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v190, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v189, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xe ; 60-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:312 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v14, v119 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB31_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB31_2 ; ; GFX11-FAKE16-LABEL: bitcast_v48i16_to_v24f32_scalar: @@ -20178,285 +19641,106 @@ define inreg <24 x float> @bitcast_v48f16_to_v24f32_scalar(<48 x half> inreg %a, ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:188 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:60 -; GFX11-TRUE16-NEXT: s_clause 0xe ; 60-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v5 :: v_dual_mov_b32 v186, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v3 :: v_dual_mov_b32 v188, v2 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v1 :: v_dual_mov_b32 v190, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB35_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB35_3 ; GFX11-TRUE16-NEXT: .LBB35_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s40 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s41 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v190, 0x200, v190 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v189, 0x200, v189 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v188, 0x200, v188 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v187, 0x200, v187 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v186, 0x200, v186 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v185, 0x200, v185 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s0 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s1 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s2 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, s3 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v27, 0x200, s4 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v35, 0x200, s5 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v44, 0x200, s6 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v54, 0x200, s7 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v65, 0x200, s8 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v77, 0x200, s9 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v90, 0x200, s10 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v104, 0x200, s11 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v119, 0x200, s12 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v135, 0x200, s13 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v152, 0x200, s14 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v170, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s0 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, 0x200, s1 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s2 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v3, 0x200, s3 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v4, 0x200, s4 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s5 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v6, 0x200, s6 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v7, 0x200, s7 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v8, 0x200, s8 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s9 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v10, 0x200, s10 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v11, 0x200, s11 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v12, 0x200, s12 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v13, 0x200, s13 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s14 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v15, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v16, 0x200, s16 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v17, 0x200, s17 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v18, 0x200, v18 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v19, 0x200, v19 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, v20 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v21, 0x200, v21 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v22, 0x200, v22 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v23, 0x200, v23 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: .LBB35_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v20, v188 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v18, v190 :: v_dual_mov_b32 v19, v189 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v187 :: v_dual_mov_b32 v22, v186 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v23, v185 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v190, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v189, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xe ; 60-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:312 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v14, v119 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB35_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB35_2 ; ; GFX11-FAKE16-LABEL: bitcast_v48f16_to_v24f32_scalar: @@ -24722,285 +24006,106 @@ define inreg <12 x i64> @bitcast_v48i16_to_v12i64_scalar(<48 x i16> inreg %a, i3 ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:188 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:60 -; GFX11-TRUE16-NEXT: s_clause 0xe ; 60-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v5 :: v_dual_mov_b32 v186, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v3 :: v_dual_mov_b32 v188, v2 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v1 :: v_dual_mov_b32 v190, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB43_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB43_3 ; GFX11-TRUE16-NEXT: .LBB43_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s40, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s41, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v190, v190, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v189, v189, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v188, v188, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v187, v187, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v186, v186, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v185, v185, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s0, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s1, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s2, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, s3, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v27, s4, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v35, s5, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v44, s6, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v54, s7, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v65, s8, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v77, s9, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v90, s10, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v104, s11, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v119, s12, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v135, s13, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v152, s14, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v170, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s0, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v1, s1, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s2, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v3, s3, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v4, s4, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s5, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v6, s6, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v7, s7, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v8, s8, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s9, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v10, s10, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v11, s11, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v12, s12, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v13, s13, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s14, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v15, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v16, s16, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v17, s17, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v18, v18, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v19, v19, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, v20, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v21, v21, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v22, v22, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v23, v23, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: .LBB43_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v20, v188 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v18, v190 :: v_dual_mov_b32 v19, v189 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v187 :: v_dual_mov_b32 v22, v186 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v23, v185 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v190, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v189, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xe ; 60-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:312 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v14, v119 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB43_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB43_2 ; ; GFX11-FAKE16-LABEL: bitcast_v48i16_to_v12i64_scalar: @@ -29142,285 +28247,106 @@ define inreg <12 x i64> @bitcast_v48f16_to_v12i64_scalar(<48 x half> inreg %a, i ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:188 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:60 -; GFX11-TRUE16-NEXT: s_clause 0xe ; 60-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v5 :: v_dual_mov_b32 v186, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v3 :: v_dual_mov_b32 v188, v2 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v1 :: v_dual_mov_b32 v190, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB47_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB47_3 ; GFX11-TRUE16-NEXT: .LBB47_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s40 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s41 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v190, 0x200, v190 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v189, 0x200, v189 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v188, 0x200, v188 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v187, 0x200, v187 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v186, 0x200, v186 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v185, 0x200, v185 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s0 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s1 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s2 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, s3 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v27, 0x200, s4 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v35, 0x200, s5 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v44, 0x200, s6 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v54, 0x200, s7 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v65, 0x200, s8 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v77, 0x200, s9 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v90, 0x200, s10 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v104, 0x200, s11 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v119, 0x200, s12 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v135, 0x200, s13 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v152, 0x200, s14 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v170, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s0 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, 0x200, s1 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s2 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v3, 0x200, s3 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v4, 0x200, s4 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s5 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v6, 0x200, s6 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v7, 0x200, s7 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v8, 0x200, s8 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s9 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v10, 0x200, s10 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v11, 0x200, s11 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v12, 0x200, s12 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v13, 0x200, s13 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s14 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v15, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v16, 0x200, s16 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v17, 0x200, s17 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v18, 0x200, v18 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v19, 0x200, v19 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, v20 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v21, 0x200, v21 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v22, 0x200, v22 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v23, 0x200, v23 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: .LBB47_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v20, v188 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v18, v190 :: v_dual_mov_b32 v19, v189 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v187 :: v_dual_mov_b32 v22, v186 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v23, v185 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v190, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v189, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xe ; 60-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:312 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v14, v119 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB47_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB47_2 ; ; GFX11-FAKE16-LABEL: bitcast_v48f16_to_v12i64_scalar: @@ -32932,285 +31858,106 @@ define inreg <12 x double> @bitcast_v48i16_to_v12f64_scalar(<48 x i16> inreg %a, ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:188 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:60 -; GFX11-TRUE16-NEXT: s_clause 0xe ; 60-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v5 :: v_dual_mov_b32 v186, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v3 :: v_dual_mov_b32 v188, v2 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v1 :: v_dual_mov_b32 v190, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB51_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB51_3 ; GFX11-TRUE16-NEXT: .LBB51_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s40, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s41, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v190, v190, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v189, v189, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v188, v188, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v187, v187, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v186, v186, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v185, v185, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s0, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s1, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s2, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, s3, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v27, s4, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v35, s5, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v44, s6, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v54, s7, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v65, s8, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v77, s9, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v90, s10, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v104, s11, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v119, s12, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v135, s13, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v152, s14, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v170, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s0, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v1, s1, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s2, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v3, s3, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v4, s4, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s5, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v6, s6, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v7, s7, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v8, s8, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s9, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v10, s10, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v11, s11, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v12, s12, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v13, s13, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s14, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v15, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v16, s16, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v17, s17, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v18, v18, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v19, v19, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, v20, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v21, v21, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v22, v22, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v23, v23, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: .LBB51_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v20, v188 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v18, v190 :: v_dual_mov_b32 v19, v189 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v187 :: v_dual_mov_b32 v22, v186 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v23, v185 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v190, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v189, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xe ; 60-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:312 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v14, v119 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB51_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB51_2 ; ; GFX11-FAKE16-LABEL: bitcast_v48i16_to_v12f64_scalar: @@ -37399,285 +36146,106 @@ define inreg <12 x double> @bitcast_v48f16_to_v12f64_scalar(<48 x half> inreg %a ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:188 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:60 -; GFX11-TRUE16-NEXT: s_clause 0xe ; 60-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v185, v5 :: v_dual_mov_b32 v186, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v3 :: v_dual_mov_b32 v188, v2 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v1 :: v_dual_mov_b32 v190, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB55_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB55_3 ; GFX11-TRUE16-NEXT: .LBB55_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s40 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s41 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v190, 0x200, v190 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v189, 0x200, v189 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v188, 0x200, v188 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v187, 0x200, v187 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v186, 0x200, v186 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v185, 0x200, v185 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s0 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s1 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s2 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, s3 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v27, 0x200, s4 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v35, 0x200, s5 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v44, 0x200, s6 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v54, 0x200, s7 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v65, 0x200, s8 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v77, 0x200, s9 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v90, 0x200, s10 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v104, 0x200, s11 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v119, 0x200, s12 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v135, 0x200, s13 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v152, 0x200, s14 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v170, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s0 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, 0x200, s1 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s2 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v3, 0x200, s3 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v4, 0x200, s4 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s5 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v6, 0x200, s6 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v7, 0x200, s7 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v8, 0x200, s8 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s9 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v10, 0x200, s10 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v11, 0x200, s11 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v12, 0x200, s12 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v13, 0x200, s13 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s14 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v15, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v16, 0x200, s16 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v17, 0x200, s17 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v18, 0x200, v18 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v19, 0x200, v19 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, v20 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v21, 0x200, v21 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v22, 0x200, v22 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v23, 0x200, v23 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: .LBB55_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v20, v188 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v18, v190 :: v_dual_mov_b32 v19, v189 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v187 :: v_dual_mov_b32 v22, v186 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v23, v185 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v190, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v189, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xe ; 60-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:312 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v14, v119 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB55_4: -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB55_2 ; ; GFX11-FAKE16-LABEL: bitcast_v48f16_to_v12f64_scalar: diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.832bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.832bit.ll index c7a199328012d..7e5c8486fa2dd 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.832bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.832bit.ll @@ -6282,294 +6282,109 @@ define inreg <26 x i32> @bitcast_v52i16_to_v26i32_scalar(<52 x i16> inreg %a, i3 ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v186, v6 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v5 :: v_dual_mov_b32 v188, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v3 :: v_dual_mov_b32 v190, v2 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v191, v1 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v185, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB15_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB15_3 ; GFX11-TRUE16-NEXT: .LBB15_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s40, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s41, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v185, v185, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v191, v191, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v190, v190, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v189, v189, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v188, v188, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v187, v187, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v186, v186, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s0, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v1, s1, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s2, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v3, s3, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v4, s4, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s5, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v6, s6, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v7, s7, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v8, s8, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s9, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v10, s10, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v11, s11, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v12, s12, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v13, s13, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s14, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v15, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v16, s16, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v17, s17, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v18, v18, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v19, v19, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, v20, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v21, v21, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v22, v22, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v23, v23, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v24, v24, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v25, v25, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s0, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s1, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s2, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, s3, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v27, s4, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v35, s5, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v44, s6, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v54, s7, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v65, s8, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v77, s9, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v90, s10, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v104, s11, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v119, s12, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v135, s13, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v152, s14, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v170, s15, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: .LBB15_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v18, v185 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v191 :: v_dual_mov_b32 v20, v190 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v189 :: v_dual_mov_b32 v22, v188 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v187 :: v_dual_mov_b32 v24, v186 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v191, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v190, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v189, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:316 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v14, v119 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB15_4: -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v53, v25 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v25, v53 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB15_2 ; ; GFX11-FAKE16-LABEL: bitcast_v52i16_to_v26i32_scalar: @@ -11102,294 +10917,109 @@ define inreg <26 x i32> @bitcast_v52f16_to_v26i32_scalar(<52 x half> inreg %a, i ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v186, v6 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v5 :: v_dual_mov_b32 v188, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v3 :: v_dual_mov_b32 v190, v2 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v191, v1 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v185, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB19_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB19_3 ; GFX11-TRUE16-NEXT: .LBB19_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s40 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s41 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v185, 0x200, v185 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v191, 0x200, v191 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v190, 0x200, v190 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v189, 0x200, v189 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v188, 0x200, v188 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v187, 0x200, v187 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v186, 0x200, v186 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s0 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, 0x200, s1 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s2 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v3, 0x200, s3 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v4, 0x200, s4 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s5 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v6, 0x200, s6 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v7, 0x200, s7 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v8, 0x200, s8 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s9 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v10, 0x200, s10 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v11, 0x200, s11 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v12, 0x200, s12 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v13, 0x200, s13 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s14 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v15, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v16, 0x200, s16 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v17, 0x200, s17 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v18, 0x200, v18 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v19, 0x200, v19 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, v20 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v21, 0x200, v21 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v22, 0x200, v22 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v23, 0x200, v23 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v24, 0x200, v24 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v25, 0x200, v25 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s0 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s1 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s2 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, s3 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v27, 0x200, s4 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v35, 0x200, s5 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v44, 0x200, s6 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v54, 0x200, s7 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v65, 0x200, s8 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v77, 0x200, s9 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v90, 0x200, s10 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v104, 0x200, s11 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v119, 0x200, s12 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v135, 0x200, s13 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v152, 0x200, s14 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v170, 0x200, s15 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: .LBB19_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v18, v185 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v191 :: v_dual_mov_b32 v20, v190 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v189 :: v_dual_mov_b32 v22, v188 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v187 :: v_dual_mov_b32 v24, v186 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v191, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v190, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v189, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:316 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v14, v119 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB19_4: -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v53, v25 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v25, v53 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB19_2 ; ; GFX11-FAKE16-LABEL: bitcast_v52f16_to_v26i32_scalar: @@ -17006,294 +16636,109 @@ define inreg <26 x float> @bitcast_v52i16_to_v26f32_scalar(<52 x i16> inreg %a, ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v186, v6 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v5 :: v_dual_mov_b32 v188, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v3 :: v_dual_mov_b32 v190, v2 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v191, v1 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v185, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB31_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB31_3 ; GFX11-TRUE16-NEXT: .LBB31_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s40, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s41, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v185, v185, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v191, v191, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v190, v190, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v189, v189, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v188, v188, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v187, v187, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v186, v186, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s0, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v1, s1, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s2, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v3, s3, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v4, s4, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s5, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v6, s6, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v7, s7, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v8, s8, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s9, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v10, s10, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v11, s11, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v12, s12, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v13, s13, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s14, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v15, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v16, s16, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v17, s17, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v18, v18, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v19, v19, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, v20, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v21, v21, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v22, v22, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v23, v23, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v24, v24, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v25, v25, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s0, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s1, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s2, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, s3, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v27, s4, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v35, s5, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v44, s6, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v54, s7, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v65, s8, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v77, s9, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v90, s10, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v104, s11, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v119, s12, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v135, s13, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v152, s14, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v170, s15, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: .LBB31_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v18, v185 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v191 :: v_dual_mov_b32 v20, v190 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v189 :: v_dual_mov_b32 v22, v188 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v187 :: v_dual_mov_b32 v24, v186 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v191, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v190, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v189, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:316 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v14, v119 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB31_4: -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v53, v25 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v25, v53 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB31_2 ; ; GFX11-FAKE16-LABEL: bitcast_v52i16_to_v26f32_scalar: @@ -21984,294 +21429,109 @@ define inreg <26 x float> @bitcast_v52f16_to_v26f32_scalar(<52 x half> inreg %a, ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v186, v6 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v5 :: v_dual_mov_b32 v188, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v3 :: v_dual_mov_b32 v190, v2 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v191, v1 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v185, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB35_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB35_3 ; GFX11-TRUE16-NEXT: .LBB35_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s40 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s41 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v185, 0x200, v185 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v191, 0x200, v191 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v190, 0x200, v190 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v189, 0x200, v189 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v188, 0x200, v188 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v187, 0x200, v187 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v186, 0x200, v186 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s0 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, 0x200, s1 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s2 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v3, 0x200, s3 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v4, 0x200, s4 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s5 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v6, 0x200, s6 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v7, 0x200, s7 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v8, 0x200, s8 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s9 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v10, 0x200, s10 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v11, 0x200, s11 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v12, 0x200, s12 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v13, 0x200, s13 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s14 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v15, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v16, 0x200, s16 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v17, 0x200, s17 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v18, 0x200, v18 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v19, 0x200, v19 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, v20 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v21, 0x200, v21 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v22, 0x200, v22 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v23, 0x200, v23 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v24, 0x200, v24 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v25, 0x200, v25 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s0 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s1 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s2 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, s3 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v27, 0x200, s4 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v35, 0x200, s5 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v44, 0x200, s6 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v54, 0x200, s7 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v65, 0x200, s8 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v77, 0x200, s9 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v90, 0x200, s10 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v104, 0x200, s11 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v119, 0x200, s12 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v135, 0x200, s13 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v152, 0x200, s14 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v170, 0x200, s15 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: .LBB35_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v18, v185 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v191 :: v_dual_mov_b32 v20, v190 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v189 :: v_dual_mov_b32 v22, v188 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v187 :: v_dual_mov_b32 v24, v186 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v191, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v190, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v189, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:316 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v14, v119 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB35_4: -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v53, v25 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v25, v53 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB35_2 ; ; GFX11-FAKE16-LABEL: bitcast_v52f16_to_v26f32_scalar: @@ -26942,294 +26202,109 @@ define inreg <13 x i64> @bitcast_v52i16_to_v13i64_scalar(<52 x i16> inreg %a, i3 ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v186, v6 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v5 :: v_dual_mov_b32 v188, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v3 :: v_dual_mov_b32 v190, v2 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v191, v1 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v185, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB43_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB43_3 ; GFX11-TRUE16-NEXT: .LBB43_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s40, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s41, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v185, v185, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v191, v191, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v190, v190, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v189, v189, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v188, v188, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v187, v187, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v186, v186, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s0, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v1, s1, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s2, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v3, s3, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v4, s4, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s5, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v6, s6, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v7, s7, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v8, s8, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s9, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v10, s10, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v11, s11, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v12, s12, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v13, s13, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s14, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v15, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v16, s16, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v17, s17, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v18, v18, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v19, v19, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, v20, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v21, v21, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v22, v22, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v23, v23, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v24, v24, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v25, v25, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s0, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s1, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s2, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, s3, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v27, s4, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v35, s5, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v44, s6, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v54, s7, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v65, s8, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v77, s9, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v90, s10, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v104, s11, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v119, s12, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v135, s13, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v152, s14, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v170, s15, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: .LBB43_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v18, v185 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v191 :: v_dual_mov_b32 v20, v190 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v189 :: v_dual_mov_b32 v22, v188 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v187 :: v_dual_mov_b32 v24, v186 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v191, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v190, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v189, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:316 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v14, v119 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB43_4: -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v53, v25 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v25, v53 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB43_2 ; ; GFX11-FAKE16-LABEL: bitcast_v52i16_to_v13i64_scalar: @@ -31777,294 +30852,109 @@ define inreg <13 x i64> @bitcast_v52f16_to_v13i64_scalar(<52 x half> inreg %a, i ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v186, v6 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v5 :: v_dual_mov_b32 v188, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v3 :: v_dual_mov_b32 v190, v2 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v191, v1 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v185, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB47_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB47_3 ; GFX11-TRUE16-NEXT: .LBB47_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s40 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s41 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v185, 0x200, v185 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v191, 0x200, v191 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v190, 0x200, v190 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v189, 0x200, v189 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v188, 0x200, v188 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v187, 0x200, v187 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v186, 0x200, v186 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s0 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, 0x200, s1 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s2 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v3, 0x200, s3 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v4, 0x200, s4 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s5 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v6, 0x200, s6 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v7, 0x200, s7 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v8, 0x200, s8 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s9 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v10, 0x200, s10 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v11, 0x200, s11 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v12, 0x200, s12 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v13, 0x200, s13 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s14 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v15, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v16, 0x200, s16 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v17, 0x200, s17 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v18, 0x200, v18 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v19, 0x200, v19 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, v20 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v21, 0x200, v21 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v22, 0x200, v22 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v23, 0x200, v23 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v24, 0x200, v24 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v25, 0x200, v25 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s0 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s1 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s2 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, s3 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v27, 0x200, s4 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v35, 0x200, s5 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v44, 0x200, s6 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v54, 0x200, s7 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v65, 0x200, s8 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v77, 0x200, s9 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v90, 0x200, s10 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v104, 0x200, s11 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v119, 0x200, s12 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v135, 0x200, s13 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v152, 0x200, s14 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v170, 0x200, s15 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: .LBB47_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v18, v185 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v191 :: v_dual_mov_b32 v20, v190 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v189 :: v_dual_mov_b32 v22, v188 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v187 :: v_dual_mov_b32 v24, v186 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v191, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v190, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v189, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:316 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v14, v119 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB47_4: -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v53, v25 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v25, v53 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB47_2 ; ; GFX11-FAKE16-LABEL: bitcast_v52f16_to_v13i64_scalar: @@ -35961,294 +34851,109 @@ define inreg <13 x double> @bitcast_v52i16_to_v13f64_scalar(<52 x i16> inreg %a, ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v186, v6 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v5 :: v_dual_mov_b32 v188, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v3 :: v_dual_mov_b32 v190, v2 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v191, v1 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v185, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB51_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB51_3 ; GFX11-TRUE16-NEXT: .LBB51_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s40, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s41, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v185, v185, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v191, v191, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v190, v190, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v189, v189, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v188, v188, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v187, v187, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v186, v186, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s0, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v1, s1, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s2, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v3, s3, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v4, s4, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s5, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v6, s6, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v7, s7, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v8, s8, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s9, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v10, s10, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v11, s11, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v12, s12, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v13, s13, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s14, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v15, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v16, s16, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v17, s17, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v18, v18, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v19, v19, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, v20, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v21, v21, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v22, v22, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v23, v23, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v24, v24, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v25, v25, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s0, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s1, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s2, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, s3, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v27, s4, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v35, s5, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v44, s6, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v54, s7, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v65, s8, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v77, s9, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v90, s10, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v104, s11, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v119, s12, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v135, s13, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v152, s14, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v170, s15, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: .LBB51_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v18, v185 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v191 :: v_dual_mov_b32 v20, v190 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v189 :: v_dual_mov_b32 v22, v188 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v187 :: v_dual_mov_b32 v24, v186 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v191, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v190, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v189, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:316 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v14, v119 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB51_4: -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v53, v25 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v25, v53 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB51_2 ; ; GFX11-FAKE16-LABEL: bitcast_v52i16_to_v13f64_scalar: @@ -40848,294 +39553,109 @@ define inreg <13 x double> @bitcast_v52f16_to_v13f64_scalar(<52 x half> inreg %a ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v186, v6 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v187, v5 :: v_dual_mov_b32 v188, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v189, v3 :: v_dual_mov_b32 v190, v2 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v191, v1 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v185, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB55_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB55_3 ; GFX11-TRUE16-NEXT: .LBB55_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s40 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s41 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v185, 0x200, v185 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v191, 0x200, v191 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v190, 0x200, v190 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v189, 0x200, v189 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v188, 0x200, v188 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v187, 0x200, v187 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v186, 0x200, v186 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s0 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, 0x200, s1 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s2 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v3, 0x200, s3 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v4, 0x200, s4 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s5 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v6, 0x200, s6 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v7, 0x200, s7 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v8, 0x200, s8 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s9 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v10, 0x200, s10 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v11, 0x200, s11 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v12, 0x200, s12 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v13, 0x200, s13 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s14 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v15, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v16, 0x200, s16 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v17, 0x200, s17 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v18, 0x200, v18 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v19, 0x200, v19 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, v20 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v21, 0x200, v21 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v22, 0x200, v22 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v23, 0x200, v23 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v24, 0x200, v24 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v25, 0x200, v25 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s0 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s1 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s2 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, s3 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v27, 0x200, s4 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v35, 0x200, s5 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v44, 0x200, s6 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v54, 0x200, s7 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v65, 0x200, s8 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v77, 0x200, s9 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v90, 0x200, s10 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v104, 0x200, s11 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v119, 0x200, s12 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v135, 0x200, s13 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v152, 0x200, s14 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v170, 0x200, s15 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: .LBB55_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v18, v185 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v191 :: v_dual_mov_b32 v20, v190 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v189 :: v_dual_mov_b32 v22, v188 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v187 :: v_dual_mov_b32 v24, v186 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v191, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v190, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v189, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:316 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v14, v119 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB55_4: -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v53, v25 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v25, v53 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB55_2 ; ; GFX11-FAKE16-LABEL: bitcast_v52f16_to_v13f64_scalar: diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.896bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.896bit.ll index 77df03dcdcd9f..2920c8cfd03cb 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.896bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.896bit.ll @@ -6779,298 +6779,112 @@ define inreg <28 x i32> @bitcast_v56i16_to_v28i32_scalar(<56 x i16> inreg %a, i3 ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v9 :: v_dual_mov_b32 v25, v7 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v189, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v188, v6 :: v_dual_mov_b32 v191, v3 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v190, v4 :: v_dual_mov_b32 v185, v2 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v186, v1 :: v_dual_mov_b32 v187, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v9 :: v_dual_mov_b32 v26, v8 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB15_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB15_3 ; GFX11-TRUE16-NEXT: .LBB15_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s40, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s41, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v187, v187, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v186, v186, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v185, v185, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v191, v191, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v190, v190, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v189, v189, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v188, v188, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s0, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v1, s1, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s2, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v3, s3, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v4, s4, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s5, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v6, s6, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v7, s7, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v8, s8, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s9, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v10, s10, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v11, s11, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v12, s12, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v13, s13, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s14, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v15, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v16, s16, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v17, s17, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v18, v18, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v19, v19, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, v20, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v21, v21, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v22, v22, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v23, v23, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v24, v24, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v25, v25, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v26, v26, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v28, v28, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s0, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s1, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s2, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, s3, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v27, s4, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v35, s5, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v44, s6, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v54, s7, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v65, s8, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v77, s9, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v90, s10, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v104, s11, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v119, s12, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v135, s13, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v152, s14, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v170, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v27, v27, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: .LBB15_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v18, v187 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v186 :: v_dual_mov_b32 v20, v185 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v191 :: v_dual_mov_b32 v22, v190 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v189 :: v_dual_mov_b32 v24, v188 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v191, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v190, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v189, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:316 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, v119 :: v_dual_mov_b32 v27, v28 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB15_4: -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v64, v28 :: v_dual_mov_b32 v53, v26 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v54, v25 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v54 :: v_dual_mov_b32 v26, v53 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v28, v64 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB15_2 ; ; GFX11-FAKE16-LABEL: bitcast_v56i16_to_v28i32_scalar: @@ -12041,298 +11855,112 @@ define inreg <28 x i32> @bitcast_v56f16_to_v28i32_scalar(<56 x half> inreg %a, i ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v9 :: v_dual_mov_b32 v25, v7 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v189, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v188, v6 :: v_dual_mov_b32 v191, v3 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v190, v4 :: v_dual_mov_b32 v185, v2 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v186, v1 :: v_dual_mov_b32 v187, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v9 :: v_dual_mov_b32 v26, v8 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB19_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB19_3 ; GFX11-TRUE16-NEXT: .LBB19_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s40 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s41 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v187, 0x200, v187 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v186, 0x200, v186 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v185, 0x200, v185 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v191, 0x200, v191 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v190, 0x200, v190 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v189, 0x200, v189 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v188, 0x200, v188 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s0 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, 0x200, s1 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s2 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v3, 0x200, s3 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v4, 0x200, s4 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s5 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v6, 0x200, s6 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v7, 0x200, s7 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v8, 0x200, s8 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s9 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v10, 0x200, s10 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v11, 0x200, s11 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v12, 0x200, s12 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v13, 0x200, s13 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s14 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v15, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v16, 0x200, s16 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v17, 0x200, s17 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v18, 0x200, v18 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v19, 0x200, v19 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, v20 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v21, 0x200, v21 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v22, 0x200, v22 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v23, 0x200, v23 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v24, 0x200, v24 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v25, 0x200, v25 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v26, 0x200, v26 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v28, 0x200, v28 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s0 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s1 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s2 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, s3 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v27, 0x200, s4 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v35, 0x200, s5 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v44, 0x200, s6 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v54, 0x200, s7 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v65, 0x200, s8 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v77, 0x200, s9 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v90, 0x200, s10 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v104, 0x200, s11 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v119, 0x200, s12 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v135, 0x200, s13 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v152, 0x200, s14 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v170, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v27, 0x200, v27 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: .LBB19_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v18, v187 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v186 :: v_dual_mov_b32 v20, v185 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v191 :: v_dual_mov_b32 v22, v190 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v189 :: v_dual_mov_b32 v24, v188 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v191, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v190, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v189, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:316 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, v119 :: v_dual_mov_b32 v27, v28 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB19_4: -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v64, v28 :: v_dual_mov_b32 v53, v26 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v54, v25 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v54 :: v_dual_mov_b32 v26, v53 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v28, v64 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB19_2 ; ; GFX11-FAKE16-LABEL: bitcast_v56f16_to_v28i32_scalar: @@ -18401,298 +18029,112 @@ define inreg <28 x float> @bitcast_v56i16_to_v28f32_scalar(<56 x i16> inreg %a, ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v9 :: v_dual_mov_b32 v25, v7 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v189, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v188, v6 :: v_dual_mov_b32 v191, v3 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v190, v4 :: v_dual_mov_b32 v185, v2 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v186, v1 :: v_dual_mov_b32 v187, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v9 :: v_dual_mov_b32 v26, v8 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB31_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB31_3 ; GFX11-TRUE16-NEXT: .LBB31_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s40, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s41, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v187, v187, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v186, v186, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v185, v185, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v191, v191, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v190, v190, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v189, v189, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v188, v188, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s0, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v1, s1, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s2, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v3, s3, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v4, s4, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s5, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v6, s6, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v7, s7, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v8, s8, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s9, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v10, s10, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v11, s11, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v12, s12, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v13, s13, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s14, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v15, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v16, s16, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v17, s17, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v18, v18, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v19, v19, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, v20, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v21, v21, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v22, v22, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v23, v23, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v24, v24, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v25, v25, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v26, v26, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v28, v28, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s0, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s1, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s2, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, s3, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v27, s4, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v35, s5, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v44, s6, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v54, s7, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v65, s8, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v77, s9, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v90, s10, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v104, s11, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v119, s12, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v135, s13, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v152, s14, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v170, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v27, v27, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: .LBB31_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v18, v187 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v186 :: v_dual_mov_b32 v20, v185 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v191 :: v_dual_mov_b32 v22, v190 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v189 :: v_dual_mov_b32 v24, v188 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v191, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v190, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v189, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:316 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, v119 :: v_dual_mov_b32 v27, v28 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB31_4: -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v64, v28 :: v_dual_mov_b32 v53, v26 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v54, v25 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v54 :: v_dual_mov_b32 v26, v53 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v28, v64 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB31_2 ; ; GFX11-FAKE16-LABEL: bitcast_v56i16_to_v28f32_scalar: @@ -23821,298 +23263,112 @@ define inreg <28 x float> @bitcast_v56f16_to_v28f32_scalar(<56 x half> inreg %a, ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v9 :: v_dual_mov_b32 v25, v7 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v189, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v188, v6 :: v_dual_mov_b32 v191, v3 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v190, v4 :: v_dual_mov_b32 v185, v2 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v186, v1 :: v_dual_mov_b32 v187, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v9 :: v_dual_mov_b32 v26, v8 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB35_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB35_3 ; GFX11-TRUE16-NEXT: .LBB35_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s40 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s41 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v187, 0x200, v187 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v186, 0x200, v186 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v185, 0x200, v185 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v191, 0x200, v191 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v190, 0x200, v190 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v189, 0x200, v189 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v188, 0x200, v188 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s0 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, 0x200, s1 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s2 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v3, 0x200, s3 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v4, 0x200, s4 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s5 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v6, 0x200, s6 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v7, 0x200, s7 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v8, 0x200, s8 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s9 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v10, 0x200, s10 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v11, 0x200, s11 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v12, 0x200, s12 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v13, 0x200, s13 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s14 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v15, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v16, 0x200, s16 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v17, 0x200, s17 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v18, 0x200, v18 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v19, 0x200, v19 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, v20 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v21, 0x200, v21 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v22, 0x200, v22 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v23, 0x200, v23 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v24, 0x200, v24 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v25, 0x200, v25 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v26, 0x200, v26 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v28, 0x200, v28 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s0 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s1 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s2 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, s3 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v27, 0x200, s4 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v35, 0x200, s5 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v44, 0x200, s6 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v54, 0x200, s7 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v65, 0x200, s8 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v77, 0x200, s9 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v90, 0x200, s10 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v104, 0x200, s11 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v119, 0x200, s12 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v135, 0x200, s13 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v152, 0x200, s14 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v170, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v27, 0x200, v27 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: .LBB35_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v18, v187 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v186 :: v_dual_mov_b32 v20, v185 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v191 :: v_dual_mov_b32 v22, v190 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v189 :: v_dual_mov_b32 v24, v188 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v191, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v190, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v189, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:316 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, v119 :: v_dual_mov_b32 v27, v28 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB35_4: -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v64, v28 :: v_dual_mov_b32 v53, v26 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v54, v25 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v54 :: v_dual_mov_b32 v26, v53 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v28, v64 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB35_2 ; ; GFX11-FAKE16-LABEL: bitcast_v56f16_to_v28f32_scalar: @@ -29188,298 +28444,112 @@ define inreg <14 x i64> @bitcast_v56i16_to_v14i64_scalar(<56 x i16> inreg %a, i3 ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v9 :: v_dual_mov_b32 v25, v7 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v189, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v188, v6 :: v_dual_mov_b32 v191, v3 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v190, v4 :: v_dual_mov_b32 v185, v2 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v186, v1 :: v_dual_mov_b32 v187, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v9 :: v_dual_mov_b32 v26, v8 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB43_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB43_3 ; GFX11-TRUE16-NEXT: .LBB43_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s40, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s41, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v187, v187, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v186, v186, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v185, v185, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v191, v191, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v190, v190, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v189, v189, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v188, v188, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s0, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v1, s1, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s2, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v3, s3, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v4, s4, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s5, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v6, s6, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v7, s7, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v8, s8, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s9, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v10, s10, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v11, s11, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v12, s12, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v13, s13, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s14, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v15, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v16, s16, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v17, s17, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v18, v18, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v19, v19, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, v20, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v21, v21, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v22, v22, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v23, v23, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v24, v24, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v25, v25, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v26, v26, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v28, v28, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s0, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s1, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s2, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, s3, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v27, s4, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v35, s5, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v44, s6, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v54, s7, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v65, s8, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v77, s9, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v90, s10, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v104, s11, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v119, s12, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v135, s13, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v152, s14, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v170, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v27, v27, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: .LBB43_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v18, v187 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v186 :: v_dual_mov_b32 v20, v185 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v191 :: v_dual_mov_b32 v22, v190 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v189 :: v_dual_mov_b32 v24, v188 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v191, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v190, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v189, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:316 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, v119 :: v_dual_mov_b32 v27, v28 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB43_4: -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v64, v28 :: v_dual_mov_b32 v53, v26 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v54, v25 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v54 :: v_dual_mov_b32 v26, v53 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v28, v64 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB43_2 ; ; GFX11-FAKE16-LABEL: bitcast_v56i16_to_v14i64_scalar: @@ -34464,298 +33534,112 @@ define inreg <14 x i64> @bitcast_v56f16_to_v14i64_scalar(<56 x half> inreg %a, i ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v9 :: v_dual_mov_b32 v25, v7 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v189, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v188, v6 :: v_dual_mov_b32 v191, v3 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v190, v4 :: v_dual_mov_b32 v185, v2 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v186, v1 :: v_dual_mov_b32 v187, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v9 :: v_dual_mov_b32 v26, v8 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB47_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB47_3 ; GFX11-TRUE16-NEXT: .LBB47_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s40 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s41 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v187, 0x200, v187 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v186, 0x200, v186 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v185, 0x200, v185 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v191, 0x200, v191 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v190, 0x200, v190 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v189, 0x200, v189 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v188, 0x200, v188 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s0 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, 0x200, s1 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s2 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v3, 0x200, s3 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v4, 0x200, s4 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s5 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v6, 0x200, s6 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v7, 0x200, s7 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v8, 0x200, s8 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s9 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v10, 0x200, s10 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v11, 0x200, s11 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v12, 0x200, s12 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v13, 0x200, s13 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s14 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v15, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v16, 0x200, s16 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v17, 0x200, s17 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v18, 0x200, v18 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v19, 0x200, v19 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, v20 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v21, 0x200, v21 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v22, 0x200, v22 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v23, 0x200, v23 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v24, 0x200, v24 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v25, 0x200, v25 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v26, 0x200, v26 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v28, 0x200, v28 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s0 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s1 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s2 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, s3 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v27, 0x200, s4 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v35, 0x200, s5 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v44, 0x200, s6 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v54, 0x200, s7 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v65, 0x200, s8 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v77, 0x200, s9 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v90, 0x200, s10 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v104, 0x200, s11 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v119, 0x200, s12 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v135, 0x200, s13 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v152, 0x200, s14 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v170, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v27, 0x200, v27 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: .LBB47_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v18, v187 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v186 :: v_dual_mov_b32 v20, v185 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v191 :: v_dual_mov_b32 v22, v190 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v189 :: v_dual_mov_b32 v24, v188 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v191, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v190, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v189, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:316 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, v119 :: v_dual_mov_b32 v27, v28 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB47_4: -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v64, v28 :: v_dual_mov_b32 v53, v26 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v54, v25 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v54 :: v_dual_mov_b32 v26, v53 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v28, v64 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB47_2 ; ; GFX11-FAKE16-LABEL: bitcast_v56f16_to_v14i64_scalar: @@ -39003,298 +37887,112 @@ define inreg <14 x double> @bitcast_v56i16_to_v14f64_scalar(<56 x i16> inreg %a, ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v9 :: v_dual_mov_b32 v25, v7 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v189, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v188, v6 :: v_dual_mov_b32 v191, v3 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v190, v4 :: v_dual_mov_b32 v185, v2 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v186, v1 :: v_dual_mov_b32 v187, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v9 :: v_dual_mov_b32 v26, v8 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB51_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB51_3 ; GFX11-TRUE16-NEXT: .LBB51_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s40, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s41, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v187, v187, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v186, v186, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v185, v185, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v191, v191, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v190, v190, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v189, v189, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v188, v188, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s0, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v1, s1, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s2, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v3, s3, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v4, s4, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s5, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v6, s6, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v7, s7, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v8, s8, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s9, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v10, s10, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v11, s11, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v12, s12, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v13, s13, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s14, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v15, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v16, s16, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v17, s17, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v18, v18, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v19, v19, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, v20, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v21, v21, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v22, v22, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v23, v23, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v24, v24, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v25, v25, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v26, v26, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v28, v28, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s0, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s1, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s2, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, s3, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v27, s4, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v35, s5, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v44, s6, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v54, s7, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v65, s8, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v77, s9, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v90, s10, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v104, s11, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v119, s12, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v135, s13, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v152, s14, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v170, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v27, v27, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: .LBB51_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v18, v187 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v186 :: v_dual_mov_b32 v20, v185 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v191 :: v_dual_mov_b32 v22, v190 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v189 :: v_dual_mov_b32 v24, v188 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v191, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v190, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v189, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:316 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, v119 :: v_dual_mov_b32 v27, v28 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB51_4: -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v64, v28 :: v_dual_mov_b32 v53, v26 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v54, v25 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v54 :: v_dual_mov_b32 v26, v53 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v28, v64 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB51_2 ; ; GFX11-FAKE16-LABEL: bitcast_v56i16_to_v14f64_scalar: @@ -44332,298 +43030,112 @@ define inreg <14 x double> @bitcast_v56f16_to_v14f64_scalar(<56 x half> inreg %a ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v9 :: v_dual_mov_b32 v25, v7 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v189, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v188, v6 :: v_dual_mov_b32 v191, v3 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v190, v4 :: v_dual_mov_b32 v185, v2 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v186, v1 :: v_dual_mov_b32 v187, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v9 :: v_dual_mov_b32 v26, v8 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB55_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB55_3 ; GFX11-TRUE16-NEXT: .LBB55_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s40 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s41 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v187, 0x200, v187 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v186, 0x200, v186 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v185, 0x200, v185 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v191, 0x200, v191 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v190, 0x200, v190 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v189, 0x200, v189 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v188, 0x200, v188 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s0 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, 0x200, s1 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s2 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v3, 0x200, s3 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v4, 0x200, s4 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s5 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v6, 0x200, s6 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v7, 0x200, s7 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v8, 0x200, s8 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s9 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v10, 0x200, s10 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v11, 0x200, s11 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v12, 0x200, s12 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v13, 0x200, s13 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s14 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v15, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v16, 0x200, s16 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v17, 0x200, s17 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v18, 0x200, v18 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v19, 0x200, v19 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, v20 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v21, 0x200, v21 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v22, 0x200, v22 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v23, 0x200, v23 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v24, 0x200, v24 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v25, 0x200, v25 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v26, 0x200, v26 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v28, 0x200, v28 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s0 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s1 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s2 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, s3 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v27, 0x200, s4 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v35, 0x200, s5 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v44, 0x200, s6 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v54, 0x200, s7 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v65, 0x200, s8 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v77, 0x200, s9 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v90, 0x200, s10 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v104, 0x200, s11 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v119, 0x200, s12 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v135, 0x200, s13 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v152, 0x200, s14 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v170, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v27, 0x200, v27 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: .LBB55_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v18, v187 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v186 :: v_dual_mov_b32 v20, v185 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v191 :: v_dual_mov_b32 v22, v190 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v189 :: v_dual_mov_b32 v24, v188 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v191, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v190, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v189, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:316 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, v119 :: v_dual_mov_b32 v27, v28 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB55_4: -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v64, v28 :: v_dual_mov_b32 v53, v26 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v54, v25 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v54 :: v_dual_mov_b32 v26, v53 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v28, v64 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB55_2 ; ; GFX11-FAKE16-LABEL: bitcast_v56f16_to_v14f64_scalar: diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.960bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.960bit.ll index c9e5771240078..01602d3d2558d 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.960bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.960bit.ll @@ -7235,304 +7235,115 @@ define inreg <30 x i32> @bitcast_v60i16_to_v30i32_scalar(<60 x i16> inreg %a, i3 ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, v9 :: v_dual_mov_b32 v25, v7 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v191, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v190, v6 :: v_dual_mov_b32 v185, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v186, v3 :: v_dual_mov_b32 v187, v2 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v188, v1 :: v_dual_mov_b32 v189, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v9 :: v_dual_mov_b32 v26, v8 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB15_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB15_3 ; GFX11-TRUE16-NEXT: .LBB15_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s40, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s41, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v189, v189, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v188, v188, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v187, v187, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v186, v186, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v185, v185, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v191, v191, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v190, v190, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s0, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v1, s1, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s2, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v3, s3, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v4, s4, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s5, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v6, s6, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v7, s7, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v8, s8, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s9, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v10, s10, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v11, s11, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v12, s12, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v13, s13, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s14, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v15, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v16, s16, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v17, s17, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v18, v18, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v19, v19, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, v20, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v21, v21, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v22, v22, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v23, v23, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v24, v24, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v25, v25, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v26, v26, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v30, v30, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v27, v27, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v28, v28, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v29, v29, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s0, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s1, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s2, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, s3, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v27, s4, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v35, s5, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v44, s6, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v54, s7, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v65, s8, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v77, s9, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v90, s10, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v104, s11, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v119, s12, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v135, s13, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v152, s14, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v170, s15, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: .LBB15_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v18, v189 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v188 :: v_dual_mov_b32 v20, v187 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v186 :: v_dual_mov_b32 v22, v185 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v191 :: v_dual_mov_b32 v24, v190 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v191, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v190, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v189, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:316 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, v119 :: v_dual_mov_b32 v27, v30 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB15_4: -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v64, v29 :: v_dual_mov_b32 v65, v28 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v66, v30 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, v26 :: v_dual_mov_b32 v54, v25 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v54 :: v_dual_mov_b32 v26, v53 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v65 :: v_dual_mov_b32 v29, v64 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v30, v66 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB15_2 ; ; GFX11-FAKE16-LABEL: bitcast_v60i16_to_v30i32_scalar: @@ -12982,304 +12793,115 @@ define inreg <30 x i32> @bitcast_v60f16_to_v30i32_scalar(<60 x half> inreg %a, i ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, v9 :: v_dual_mov_b32 v25, v7 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v191, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v190, v6 :: v_dual_mov_b32 v185, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v186, v3 :: v_dual_mov_b32 v187, v2 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v188, v1 :: v_dual_mov_b32 v189, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v9 :: v_dual_mov_b32 v26, v8 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB19_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB19_3 ; GFX11-TRUE16-NEXT: .LBB19_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s40 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s41 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v189, 0x200, v189 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v188, 0x200, v188 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v187, 0x200, v187 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v186, 0x200, v186 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v185, 0x200, v185 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v191, 0x200, v191 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v190, 0x200, v190 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s0 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, 0x200, s1 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s2 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v3, 0x200, s3 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v4, 0x200, s4 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s5 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v6, 0x200, s6 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v7, 0x200, s7 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v8, 0x200, s8 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s9 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v10, 0x200, s10 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v11, 0x200, s11 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v12, 0x200, s12 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v13, 0x200, s13 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s14 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v15, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v16, 0x200, s16 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v17, 0x200, s17 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v18, 0x200, v18 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v19, 0x200, v19 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, v20 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v21, 0x200, v21 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v22, 0x200, v22 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v23, 0x200, v23 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v24, 0x200, v24 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v25, 0x200, v25 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v26, 0x200, v26 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v30, 0x200, v30 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v27, 0x200, v27 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v28, 0x200, v28 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v29, 0x200, v29 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s0 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s1 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s2 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, s3 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v27, 0x200, s4 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v35, 0x200, s5 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v44, 0x200, s6 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v54, 0x200, s7 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v65, 0x200, s8 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v77, 0x200, s9 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v90, 0x200, s10 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v104, 0x200, s11 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v119, 0x200, s12 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v135, 0x200, s13 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v152, 0x200, s14 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v170, 0x200, s15 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: .LBB19_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v18, v189 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v188 :: v_dual_mov_b32 v20, v187 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v186 :: v_dual_mov_b32 v22, v185 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v191 :: v_dual_mov_b32 v24, v190 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v191, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v190, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v189, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:316 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, v119 :: v_dual_mov_b32 v27, v30 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB19_4: -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v64, v29 :: v_dual_mov_b32 v65, v28 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v66, v30 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, v26 :: v_dual_mov_b32 v54, v25 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v54 :: v_dual_mov_b32 v26, v53 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v65 :: v_dual_mov_b32 v29, v64 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v30, v66 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB19_2 ; ; GFX11-FAKE16-LABEL: bitcast_v60f16_to_v30i32_scalar: @@ -19762,304 +19384,115 @@ define inreg <30 x float> @bitcast_v60i16_to_v30f32_scalar(<60 x i16> inreg %a, ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, v9 :: v_dual_mov_b32 v25, v7 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v191, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v190, v6 :: v_dual_mov_b32 v185, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v186, v3 :: v_dual_mov_b32 v187, v2 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v188, v1 :: v_dual_mov_b32 v189, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v9 :: v_dual_mov_b32 v26, v8 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB31_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB31_3 ; GFX11-TRUE16-NEXT: .LBB31_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s40, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s41, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v189, v189, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v188, v188, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v187, v187, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v186, v186, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v185, v185, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v191, v191, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v190, v190, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s0, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v1, s1, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s2, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v3, s3, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v4, s4, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s5, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v6, s6, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v7, s7, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v8, s8, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s9, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v10, s10, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v11, s11, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v12, s12, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v13, s13, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s14, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v15, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v16, s16, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v17, s17, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v18, v18, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v19, v19, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, v20, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v21, v21, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v22, v22, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v23, v23, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v24, v24, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v25, v25, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v26, v26, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v30, v30, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v27, v27, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v28, v28, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v29, v29, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s0, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s1, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s2, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, s3, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v27, s4, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v35, s5, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v44, s6, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v54, s7, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v65, s8, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v77, s9, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v90, s10, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v104, s11, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v119, s12, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v135, s13, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v152, s14, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v170, s15, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: .LBB31_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v18, v189 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v188 :: v_dual_mov_b32 v20, v187 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v186 :: v_dual_mov_b32 v22, v185 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v191 :: v_dual_mov_b32 v24, v190 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v191, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v190, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v189, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:316 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, v119 :: v_dual_mov_b32 v27, v30 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB31_4: -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v64, v29 :: v_dual_mov_b32 v65, v28 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v66, v30 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, v26 :: v_dual_mov_b32 v54, v25 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v54 :: v_dual_mov_b32 v26, v53 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v65 :: v_dual_mov_b32 v29, v64 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v30, v66 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB31_2 ; ; GFX11-FAKE16-LABEL: bitcast_v60i16_to_v30f32_scalar: @@ -25670,304 +25103,115 @@ define inreg <30 x float> @bitcast_v60f16_to_v30f32_scalar(<60 x half> inreg %a, ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, v9 :: v_dual_mov_b32 v25, v7 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v191, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v190, v6 :: v_dual_mov_b32 v185, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v186, v3 :: v_dual_mov_b32 v187, v2 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v188, v1 :: v_dual_mov_b32 v189, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v9 :: v_dual_mov_b32 v26, v8 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB35_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB35_3 ; GFX11-TRUE16-NEXT: .LBB35_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s40 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s41 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v189, 0x200, v189 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v188, 0x200, v188 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v187, 0x200, v187 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v186, 0x200, v186 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v185, 0x200, v185 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v191, 0x200, v191 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v190, 0x200, v190 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s0 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, 0x200, s1 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s2 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v3, 0x200, s3 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v4, 0x200, s4 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s5 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v6, 0x200, s6 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v7, 0x200, s7 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v8, 0x200, s8 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s9 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v10, 0x200, s10 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v11, 0x200, s11 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v12, 0x200, s12 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v13, 0x200, s13 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s14 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v15, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v16, 0x200, s16 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v17, 0x200, s17 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v18, 0x200, v18 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v19, 0x200, v19 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, v20 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v21, 0x200, v21 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v22, 0x200, v22 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v23, 0x200, v23 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v24, 0x200, v24 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v25, 0x200, v25 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v26, 0x200, v26 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v30, 0x200, v30 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v27, 0x200, v27 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v28, 0x200, v28 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v29, 0x200, v29 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s0 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s1 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s2 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, s3 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v27, 0x200, s4 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v35, 0x200, s5 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v44, 0x200, s6 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v54, 0x200, s7 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v65, 0x200, s8 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v77, 0x200, s9 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v90, 0x200, s10 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v104, 0x200, s11 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v119, 0x200, s12 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v135, 0x200, s13 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v152, 0x200, s14 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v170, 0x200, s15 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: .LBB35_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v18, v189 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v188 :: v_dual_mov_b32 v20, v187 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v186 :: v_dual_mov_b32 v22, v185 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v191 :: v_dual_mov_b32 v24, v190 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v191, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v190, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v189, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:316 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, v119 :: v_dual_mov_b32 v27, v30 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB35_4: -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v64, v29 :: v_dual_mov_b32 v65, v28 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v66, v30 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, v26 :: v_dual_mov_b32 v54, v25 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v54 :: v_dual_mov_b32 v26, v53 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v65 :: v_dual_mov_b32 v29, v64 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v30, v66 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB35_2 ; ; GFX11-FAKE16-LABEL: bitcast_v60f16_to_v30f32_scalar: @@ -31421,304 +30665,115 @@ define inreg <15 x i64> @bitcast_v60i16_to_v15i64_scalar(<60 x i16> inreg %a, i3 ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, v9 :: v_dual_mov_b32 v25, v7 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v191, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v190, v6 :: v_dual_mov_b32 v185, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v186, v3 :: v_dual_mov_b32 v187, v2 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v188, v1 :: v_dual_mov_b32 v189, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v9 :: v_dual_mov_b32 v26, v8 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB43_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB43_3 ; GFX11-TRUE16-NEXT: .LBB43_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s40, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s41, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v189, v189, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v188, v188, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v187, v187, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v186, v186, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v185, v185, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v191, v191, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v190, v190, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s0, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v1, s1, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s2, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v3, s3, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v4, s4, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s5, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v6, s6, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v7, s7, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v8, s8, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s9, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v10, s10, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v11, s11, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v12, s12, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v13, s13, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s14, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v15, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v16, s16, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v17, s17, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v18, v18, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v19, v19, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, v20, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v21, v21, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v22, v22, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v23, v23, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v24, v24, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v25, v25, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v26, v26, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v30, v30, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v27, v27, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v28, v28, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v29, v29, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s0, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s1, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s2, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, s3, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v27, s4, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v35, s5, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v44, s6, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v54, s7, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v65, s8, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v77, s9, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v90, s10, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v104, s11, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v119, s12, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v135, s13, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v152, s14, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v170, s15, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: .LBB43_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v18, v189 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v188 :: v_dual_mov_b32 v20, v187 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v186 :: v_dual_mov_b32 v22, v185 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v191 :: v_dual_mov_b32 v24, v190 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v191, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v190, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v189, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:316 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, v119 :: v_dual_mov_b32 v27, v30 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB43_4: -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v64, v29 :: v_dual_mov_b32 v65, v28 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v66, v30 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, v26 :: v_dual_mov_b32 v54, v25 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v54 :: v_dual_mov_b32 v26, v53 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v65 :: v_dual_mov_b32 v29, v64 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v30, v66 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB43_2 ; ; GFX11-FAKE16-LABEL: bitcast_v60i16_to_v15i64_scalar: @@ -37185,304 +36240,115 @@ define inreg <15 x i64> @bitcast_v60f16_to_v15i64_scalar(<60 x half> inreg %a, i ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, v9 :: v_dual_mov_b32 v25, v7 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v191, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v190, v6 :: v_dual_mov_b32 v185, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v186, v3 :: v_dual_mov_b32 v187, v2 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v188, v1 :: v_dual_mov_b32 v189, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v9 :: v_dual_mov_b32 v26, v8 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB47_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB47_3 ; GFX11-TRUE16-NEXT: .LBB47_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s40 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s41 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v189, 0x200, v189 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v188, 0x200, v188 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v187, 0x200, v187 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v186, 0x200, v186 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v185, 0x200, v185 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v191, 0x200, v191 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v190, 0x200, v190 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s0 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, 0x200, s1 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s2 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v3, 0x200, s3 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v4, 0x200, s4 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s5 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v6, 0x200, s6 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v7, 0x200, s7 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v8, 0x200, s8 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s9 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v10, 0x200, s10 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v11, 0x200, s11 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v12, 0x200, s12 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v13, 0x200, s13 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s14 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v15, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v16, 0x200, s16 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v17, 0x200, s17 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v18, 0x200, v18 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v19, 0x200, v19 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, v20 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v21, 0x200, v21 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v22, 0x200, v22 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v23, 0x200, v23 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v24, 0x200, v24 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v25, 0x200, v25 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v26, 0x200, v26 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v30, 0x200, v30 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v27, 0x200, v27 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v28, 0x200, v28 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v29, 0x200, v29 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s0 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s1 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s2 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, s3 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v27, 0x200, s4 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v35, 0x200, s5 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v44, 0x200, s6 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v54, 0x200, s7 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v65, 0x200, s8 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v77, 0x200, s9 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v90, 0x200, s10 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v104, 0x200, s11 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v119, 0x200, s12 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v135, 0x200, s13 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v152, 0x200, s14 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v170, 0x200, s15 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: .LBB47_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v18, v189 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v188 :: v_dual_mov_b32 v20, v187 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v186 :: v_dual_mov_b32 v22, v185 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v191 :: v_dual_mov_b32 v24, v190 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v191, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v190, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v189, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:316 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, v119 :: v_dual_mov_b32 v27, v30 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB47_4: -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v64, v29 :: v_dual_mov_b32 v65, v28 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v66, v30 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, v26 :: v_dual_mov_b32 v54, v25 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v54 :: v_dual_mov_b32 v26, v53 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v65 :: v_dual_mov_b32 v29, v64 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v30, v66 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB47_2 ; ; GFX11-FAKE16-LABEL: bitcast_v60f16_to_v15i64_scalar: @@ -42058,304 +40924,115 @@ define inreg <15 x double> @bitcast_v60i16_to_v15f64_scalar(<60 x i16> inreg %a, ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, v9 :: v_dual_mov_b32 v25, v7 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v191, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v190, v6 :: v_dual_mov_b32 v185, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v186, v3 :: v_dual_mov_b32 v187, v2 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v188, v1 :: v_dual_mov_b32 v189, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v9 :: v_dual_mov_b32 v26, v8 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB51_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB51_3 ; GFX11-TRUE16-NEXT: .LBB51_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s40, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s41, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v189, v189, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v188, v188, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v187, v187, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v186, v186, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v185, v185, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v191, v191, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v190, v190, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, s0, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v1, s1, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, s2, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v3, s3, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v4, s4, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s5, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v6, s6, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v7, s7, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v8, s8, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s9, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v10, s10, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v11, s11, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v12, s12, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v13, s13, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s14, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v15, s15, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v16, s16, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v17, s17, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v18, v18, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v19, v19, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, v20, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v21, v21, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v22, v22, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v23, v23, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v24, v24, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v25, v25, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v26, v26, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v30, v30, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v27, v27, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v28, v28, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v29, v29, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s0, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s1, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v14, s2, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, s3, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v27, s4, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v35, s5, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v44, s6, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v54, s7, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v65, s8, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v77, s9, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v90, s10, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v104, s11, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v119, s12, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v135, s13, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v152, s14, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v170, s15, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: .LBB51_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v18, v189 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v188 :: v_dual_mov_b32 v20, v187 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v186 :: v_dual_mov_b32 v22, v185 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v191 :: v_dual_mov_b32 v24, v190 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v191, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v190, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v189, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:316 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, v119 :: v_dual_mov_b32 v27, v30 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB51_4: -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v64, v29 :: v_dual_mov_b32 v65, v28 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v66, v30 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, v26 :: v_dual_mov_b32 v54, v25 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v54 :: v_dual_mov_b32 v26, v53 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v65 :: v_dual_mov_b32 v29, v64 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v30, v66 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB51_2 ; ; GFX11-FAKE16-LABEL: bitcast_v60i16_to_v15f64_scalar: @@ -47866,304 +46543,115 @@ define inreg <15 x double> @bitcast_v60f16_to_v15f64_scalar(<60 x half> inreg %a ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v40, s32 offset:316 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v41, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v42, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v43, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v44, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v45, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v46, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v47, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v56, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v57, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v58, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v59, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v60, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v61, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v62, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v63, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v72, s32 offset:252 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v73, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v74, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v75, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v76, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v77, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v78, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v79, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v88, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v89, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v90, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v91, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v92, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v93, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v94, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v95, s32 offset:192 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v104, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v105, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v106, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v107, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v108, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v109, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v110, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v111, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v120, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v121, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v122, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v123, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v124, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v125, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v126, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v127, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v136, s32 offset:124 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v137, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v138, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v139, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v140, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v141, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v142, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v143, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v152, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v153, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v154, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v155, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v156, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v157, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v158, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v159, s32 offset:64 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Spill -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v168, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v169, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v170, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v171, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v172, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v173, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v174, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v175, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v184, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v185, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v186, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v187, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v188, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v189, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v190, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_store_b32 off, v191, s32 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v29, v11 :: v_dual_mov_b32 v28, v10 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, v9 :: v_dual_mov_b32 v25, v7 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v26, v8 :: v_dual_mov_b32 v191, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v190, v6 :: v_dual_mov_b32 v185, v4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v186, v3 :: v_dual_mov_b32 v187, v2 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v188, v1 :: v_dual_mov_b32 v189, v0 -; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s29, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s27, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s25, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s24, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s22, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s20, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s19, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s2, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 -; GFX11-TRUE16-NEXT: s_mov_b32 s42, 0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s40, s0, s40 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s41, s1, s41 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s2, s46 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s3, s45 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s16, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s17, s43 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s18, s4 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s19, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s20, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s21, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s22, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s23, s9 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s24, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s25, s11 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s26, s12 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s27, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s28, s14 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s29, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v27, v9 :: v_dual_mov_b32 v26, v8 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v7 :: v_dual_mov_b32 v24, v6 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v5 :: v_dual_mov_b32 v22, v4 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v20, v2 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v18, v0 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s26, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s24, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s23, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s21, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s19, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s18, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s1, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s46, s0, 16 +; GFX11-TRUE16-NEXT: s_mov_b32 s40, 0 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s46 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s5, s17, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s18, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s19, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s20, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s21, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s22, s10 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s23, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s24, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s25, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s26, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s27, s15 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s16, s28, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s17, s29, s41 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB55_4 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %cmp.false -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v5, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s41 :: v_dual_mov_b32 v9, s1 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v27, s4 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v20, s3 :: v_dual_mov_b32 v35, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v44, s6 :: v_dual_mov_b32 v65, s8 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s7 :: v_dual_mov_b32 v77, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v90, s10 :: v_dual_mov_b32 v119, s12 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v104, s11 :: v_dual_mov_b32 v135, s13 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v152, s14 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v170, s15 -; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s42 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 +; GFX11-TRUE16-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s40 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB55_3 ; GFX11-TRUE16-NEXT: .LBB55_2: ; %cmp.true -; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s40 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s41 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v189, 0x200, v189 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v188, 0x200, v188 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v187, 0x200, v187 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v186, 0x200, v186 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v185, 0x200, v185 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v191, 0x200, v191 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v190, 0x200, v190 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, 0x200, s0 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, 0x200, s1 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, s2 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v3, 0x200, s3 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v4, 0x200, s4 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s5 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v6, 0x200, s6 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v7, 0x200, s7 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v8, 0x200, s8 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s9 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v10, 0x200, s10 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v11, 0x200, s11 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v12, 0x200, s12 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v13, 0x200, s13 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s14 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v15, 0x200, s15 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v16, 0x200, s16 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v17, 0x200, s17 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v18, 0x200, v18 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v19, 0x200, v19 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, v20 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v21, 0x200, v21 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v22, 0x200, v22 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v23, 0x200, v23 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v24, 0x200, v24 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v25, 0x200, v25 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v26, 0x200, v26 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v30, 0x200, v30 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v27, 0x200, v27 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v28, 0x200, v28 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v29, 0x200, v29 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s0 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s1 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v14, 0x200, s2 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, s3 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v27, 0x200, s4 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v35, 0x200, s5 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v44, 0x200, s6 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v54, 0x200, s7 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v65, 0x200, s8 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v77, 0x200, s9 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v90, 0x200, s10 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v104, 0x200, s11 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v119, 0x200, s12 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v135, 0x200, s13 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v152, 0x200, s14 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v170, 0x200, s15 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: .LBB55_3: ; %end -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v5, v20 :: v_dual_mov_b32 v6, v27 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v7, v35 :: v_dual_mov_b32 v8, v44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v11, v77 :: v_dual_mov_b32 v12, v90 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v104 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v15, v135 :: v_dual_mov_b32 v16, v152 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v17, v170 :: v_dual_mov_b32 v18, v189 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v19, v188 :: v_dual_mov_b32 v20, v187 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v21, v186 :: v_dual_mov_b32 v22, v185 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v23, v191 :: v_dual_mov_b32 v24, v190 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v191, off, s32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v190, off, s32 offset:4 -; GFX11-TRUE16-NEXT: scratch_load_b32 v189, off, s32 offset:8 -; GFX11-TRUE16-NEXT: scratch_load_b32 v188, off, s32 offset:12 -; GFX11-TRUE16-NEXT: scratch_load_b32 v187, off, s32 offset:16 -; GFX11-TRUE16-NEXT: scratch_load_b32 v186, off, s32 offset:20 -; GFX11-TRUE16-NEXT: scratch_load_b32 v185, off, s32 offset:24 -; GFX11-TRUE16-NEXT: scratch_load_b32 v184, off, s32 offset:28 -; GFX11-TRUE16-NEXT: scratch_load_b32 v175, off, s32 offset:32 -; GFX11-TRUE16-NEXT: scratch_load_b32 v174, off, s32 offset:36 -; GFX11-TRUE16-NEXT: scratch_load_b32 v173, off, s32 offset:40 -; GFX11-TRUE16-NEXT: scratch_load_b32 v172, off, s32 offset:44 -; GFX11-TRUE16-NEXT: scratch_load_b32 v171, off, s32 offset:48 -; GFX11-TRUE16-NEXT: scratch_load_b32 v170, off, s32 offset:52 -; GFX11-TRUE16-NEXT: scratch_load_b32 v169, off, s32 offset:56 -; GFX11-TRUE16-NEXT: scratch_load_b32 v168, off, s32 offset:60 -; GFX11-TRUE16-NEXT: scratch_load_b32 v159, off, s32 offset:64 -; GFX11-TRUE16-NEXT: scratch_load_b32 v158, off, s32 offset:68 -; GFX11-TRUE16-NEXT: scratch_load_b32 v157, off, s32 offset:72 -; GFX11-TRUE16-NEXT: scratch_load_b32 v156, off, s32 offset:76 -; GFX11-TRUE16-NEXT: scratch_load_b32 v155, off, s32 offset:80 -; GFX11-TRUE16-NEXT: scratch_load_b32 v154, off, s32 offset:84 -; GFX11-TRUE16-NEXT: scratch_load_b32 v153, off, s32 offset:88 -; GFX11-TRUE16-NEXT: scratch_load_b32 v152, off, s32 offset:92 -; GFX11-TRUE16-NEXT: scratch_load_b32 v143, off, s32 offset:96 -; GFX11-TRUE16-NEXT: scratch_load_b32 v142, off, s32 offset:100 -; GFX11-TRUE16-NEXT: scratch_load_b32 v141, off, s32 offset:104 -; GFX11-TRUE16-NEXT: scratch_load_b32 v140, off, s32 offset:108 -; GFX11-TRUE16-NEXT: scratch_load_b32 v139, off, s32 offset:112 -; GFX11-TRUE16-NEXT: scratch_load_b32 v138, off, s32 offset:116 -; GFX11-TRUE16-NEXT: scratch_load_b32 v137, off, s32 offset:120 -; GFX11-TRUE16-NEXT: scratch_load_b32 v136, off, s32 offset:124 -; GFX11-TRUE16-NEXT: s_clause 0x1f ; 128-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v127, off, s32 offset:128 -; GFX11-TRUE16-NEXT: scratch_load_b32 v126, off, s32 offset:132 -; GFX11-TRUE16-NEXT: scratch_load_b32 v125, off, s32 offset:136 -; GFX11-TRUE16-NEXT: scratch_load_b32 v124, off, s32 offset:140 -; GFX11-TRUE16-NEXT: scratch_load_b32 v123, off, s32 offset:144 -; GFX11-TRUE16-NEXT: scratch_load_b32 v122, off, s32 offset:148 -; GFX11-TRUE16-NEXT: scratch_load_b32 v121, off, s32 offset:152 -; GFX11-TRUE16-NEXT: scratch_load_b32 v120, off, s32 offset:156 -; GFX11-TRUE16-NEXT: scratch_load_b32 v111, off, s32 offset:160 -; GFX11-TRUE16-NEXT: scratch_load_b32 v110, off, s32 offset:164 -; GFX11-TRUE16-NEXT: scratch_load_b32 v109, off, s32 offset:168 -; GFX11-TRUE16-NEXT: scratch_load_b32 v108, off, s32 offset:172 -; GFX11-TRUE16-NEXT: scratch_load_b32 v107, off, s32 offset:176 -; GFX11-TRUE16-NEXT: scratch_load_b32 v106, off, s32 offset:180 -; GFX11-TRUE16-NEXT: scratch_load_b32 v105, off, s32 offset:184 -; GFX11-TRUE16-NEXT: scratch_load_b32 v104, off, s32 offset:188 -; GFX11-TRUE16-NEXT: scratch_load_b32 v95, off, s32 offset:192 -; GFX11-TRUE16-NEXT: scratch_load_b32 v94, off, s32 offset:196 -; GFX11-TRUE16-NEXT: scratch_load_b32 v93, off, s32 offset:200 -; GFX11-TRUE16-NEXT: scratch_load_b32 v92, off, s32 offset:204 -; GFX11-TRUE16-NEXT: scratch_load_b32 v91, off, s32 offset:208 -; GFX11-TRUE16-NEXT: scratch_load_b32 v90, off, s32 offset:212 -; GFX11-TRUE16-NEXT: scratch_load_b32 v89, off, s32 offset:216 -; GFX11-TRUE16-NEXT: scratch_load_b32 v88, off, s32 offset:220 -; GFX11-TRUE16-NEXT: scratch_load_b32 v79, off, s32 offset:224 -; GFX11-TRUE16-NEXT: scratch_load_b32 v78, off, s32 offset:228 -; GFX11-TRUE16-NEXT: scratch_load_b32 v77, off, s32 offset:232 -; GFX11-TRUE16-NEXT: scratch_load_b32 v76, off, s32 offset:236 -; GFX11-TRUE16-NEXT: scratch_load_b32 v75, off, s32 offset:240 -; GFX11-TRUE16-NEXT: scratch_load_b32 v74, off, s32 offset:244 -; GFX11-TRUE16-NEXT: scratch_load_b32 v73, off, s32 offset:248 -; GFX11-TRUE16-NEXT: scratch_load_b32 v72, off, s32 offset:252 -; GFX11-TRUE16-NEXT: s_clause 0xf ; 64-byte Folded Reload -; GFX11-TRUE16-NEXT: scratch_load_b32 v63, off, s32 offset:256 -; GFX11-TRUE16-NEXT: scratch_load_b32 v62, off, s32 offset:260 -; GFX11-TRUE16-NEXT: scratch_load_b32 v61, off, s32 offset:264 -; GFX11-TRUE16-NEXT: scratch_load_b32 v60, off, s32 offset:268 -; GFX11-TRUE16-NEXT: scratch_load_b32 v59, off, s32 offset:272 -; GFX11-TRUE16-NEXT: scratch_load_b32 v58, off, s32 offset:276 -; GFX11-TRUE16-NEXT: scratch_load_b32 v57, off, s32 offset:280 -; GFX11-TRUE16-NEXT: scratch_load_b32 v56, off, s32 offset:284 -; GFX11-TRUE16-NEXT: scratch_load_b32 v47, off, s32 offset:288 -; GFX11-TRUE16-NEXT: scratch_load_b32 v46, off, s32 offset:292 -; GFX11-TRUE16-NEXT: scratch_load_b32 v45, off, s32 offset:296 -; GFX11-TRUE16-NEXT: scratch_load_b32 v44, off, s32 offset:300 -; GFX11-TRUE16-NEXT: scratch_load_b32 v43, off, s32 offset:304 -; GFX11-TRUE16-NEXT: scratch_load_b32 v42, off, s32 offset:308 -; GFX11-TRUE16-NEXT: scratch_load_b32 v41, off, s32 offset:312 -; GFX11-TRUE16-NEXT: scratch_load_b32 v40, off, s32 offset:316 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v3, v9 :: v_dual_mov_b32 v4, v14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v54 :: v_dual_mov_b32 v10, v65 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v14, v119 :: v_dual_mov_b32 v27, v30 -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-TRUE16-NEXT: .LBB55_4: -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v64, v29 :: v_dual_mov_b32 v65, v28 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v66, v30 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, v26 :: v_dual_mov_b32 v54, v25 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v25, v54 :: v_dual_mov_b32 v26, v53 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, v65 :: v_dual_mov_b32 v29, v64 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v30, v66 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143_vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167 -; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159_vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175_vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr1 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr3 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr5 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr6 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr9 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr10 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr12 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr13 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr14 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr15 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr16 +; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr17 ; GFX11-TRUE16-NEXT: s_branch .LBB55_2 ; ; GFX11-FAKE16-LABEL: bitcast_v60f16_to_v15f64_scalar: diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll index 2b63a8cf69476..28b992ee77b14 100644 --- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll +++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll @@ -981,7 +981,7 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { ; GCN-NEXT: v_cmp_gt_i32_e32 vcc, s4, v0 ; GCN-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 ; GCN-NEXT: s_mov_b64 s[8:9], 0 -; GCN-NEXT: v_mov_b32_e32 v7, 0 +; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: s_mov_b64 s[6:7], 0 ; GCN-NEXT: s_branch .LBB5_3 ; GCN-NEXT: .LBB5_1: ; %Flow @@ -1004,36 +1004,36 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { ; GCN-NEXT: ; %bb.4: ; %bb2 ; GCN-NEXT: ; in Loop: Header=BB5_3 Depth=1 ; GCN-NEXT: s_or_b64 exec, exec, s[6:7] -; GCN-NEXT: v_mov_b32_e32 v8, v7 -; GCN-NEXT: v_mov_b32_e32 v2, v7 -; GCN-NEXT: v_mov_b32_e32 v6, v7 +; GCN-NEXT: v_mov_b32_e32 v1, v0 +; GCN-NEXT: v_mov_b32_e32 v2, v0 +; GCN-NEXT: v_mov_b32_e32 v3, v0 ; GCN-NEXT: s_and_saveexec_b64 s[10:11], s[4:5] ; GCN-NEXT: s_cbranch_execz .LBB5_2 ; GCN-NEXT: ; %bb.5: ; %bb4 ; GCN-NEXT: ; in Loop: Header=BB5_3 Depth=1 -; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v8, v7 -; GCN-NEXT: v_mov_b32_e32 v2, v7 -; GCN-NEXT: v_mov_b32_e32 v6, v7 +; GCN-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen +; GCN-NEXT: v_mov_b32_e32 v1, v0 +; GCN-NEXT: v_mov_b32_e32 v2, v0 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_cmp_gt_f32_e64 s[6:7], 0, v0 +; GCN-NEXT: v_cmp_gt_f32_e64 s[6:7], 0, v3 +; GCN-NEXT: v_mov_b32_e32 v3, v0 ; GCN-NEXT: s_and_saveexec_b64 s[12:13], s[6:7] ; GCN-NEXT: s_cbranch_execz .LBB5_1 ; GCN-NEXT: ; %bb.6: ; %bb8 ; GCN-NEXT: ; in Loop: Header=BB5_3 Depth=1 -; GCN-NEXT: v_mov_b32_e32 v8, v7 -; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 -; GCN-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5_vgpr6 +; GCN-NEXT: v_mov_b32_e32 v1, v0 +; GCN-NEXT: ; implicit-def: $vgpr2 +; GCN-NEXT: ; implicit-def: $vgpr3 ; GCN-NEXT: s_branch .LBB5_1 ; GCN-NEXT: .LBB5_7: ; %bb12 ; GCN-NEXT: s_or_b64 exec, exec, s[8:9] -; GCN-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v0, v0, s[0:3], 0 offen ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/live-interval-bug-in-rename-independent-subregs.mir b/llvm/test/CodeGen/AMDGPU/live-interval-bug-in-rename-independent-subregs.mir index eaf669da83ead..9e38919190ea7 100644 --- a/llvm/test/CodeGen/AMDGPU/live-interval-bug-in-rename-independent-subregs.mir +++ b/llvm/test/CodeGen/AMDGPU/live-interval-bug-in-rename-independent-subregs.mir @@ -11,26 +11,28 @@ body: | ; REG_ALLOC-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; REG_ALLOC-NEXT: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr10, $sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11 ; REG_ALLOC-NEXT: {{ $}} - ; REG_ALLOC-NEXT: renamable $vgpr5_vgpr6_vgpr7_vgpr8 = BUFFER_LOAD_DWORDX4_OFFEN renamable $vgpr3, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) - ; REG_ALLOC-NEXT: renamable $vgpr15_vgpr16_vgpr17_vgpr18 = BUFFER_LOAD_DWORDX4_OFFEN renamable $vgpr2, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) - ; REG_ALLOC-NEXT: renamable $vgpr11_vgpr12_vgpr13_vgpr14 = BUFFER_LOAD_DWORDX4_OFFEN renamable $vgpr0, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; REG_ALLOC-NEXT: renamable $vgpr11_vgpr12_vgpr13_vgpr14 = BUFFER_LOAD_DWORDX4_OFFEN renamable $vgpr3, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; REG_ALLOC-NEXT: renamable $vgpr12_vgpr13_vgpr14_vgpr15 = BUFFER_LOAD_DWORDX4_OFFEN renamable $vgpr2, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; REG_ALLOC-NEXT: renamable $vgpr5_vgpr6_vgpr7_vgpr8 = BUFFER_LOAD_DWORDX4_OFFEN renamable $vgpr0, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; REG_ALLOC-NEXT: renamable $vgpr6_vgpr7_vgpr8_vgpr9 = BUFFER_LOAD_DWORDX4_OFFEN renamable $vgpr4, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; REG_ALLOC-NEXT: KILL killed renamable $vgpr4 ; REG_ALLOC-NEXT: KILL killed renamable $vgpr2 ; REG_ALLOC-NEXT: KILL killed renamable $vgpr0 ; REG_ALLOC-NEXT: KILL killed renamable $vgpr3 - ; REG_ALLOC-NEXT: renamable $sgpr12 = V_READFIRSTLANE_B32 killed $vgpr5, implicit $exec - ; REG_ALLOC-NEXT: renamable $vgpr4_vgpr5_vgpr6_vgpr7 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr4, killed renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) - ; REG_ALLOC-NEXT: renamable $sgpr13 = V_READFIRSTLANE_B32 killed $vgpr15, implicit $exec + ; REG_ALLOC-NEXT: KILL killed renamable $sgpr8_sgpr9_sgpr10_sgpr11 + ; REG_ALLOC-NEXT: renamable $sgpr8 = V_READFIRSTLANE_B32 killed $vgpr11, implicit $exec + ; REG_ALLOC-NEXT: renamable $sgpr9 = V_READFIRSTLANE_B32 killed $vgpr12, implicit $exec ; REG_ALLOC-NEXT: renamable $sgpr6_sgpr7 = V_CMP_NE_U32_e64 killed $vgpr1, 0, implicit $exec - ; REG_ALLOC-NEXT: S_CMP_EQ_U64 killed renamable $sgpr12_sgpr13, killed renamable $sgpr2_sgpr3, implicit-def $scc + ; REG_ALLOC-NEXT: S_CMP_EQ_U64 killed renamable $sgpr8_sgpr9, killed renamable $sgpr2_sgpr3, implicit-def $scc ; REG_ALLOC-NEXT: renamable $sgpr1 = S_CSELECT_B32 1, 0, implicit $scc - ; REG_ALLOC-NEXT: renamable $vgpr8 = IMPLICIT_DEF + ; REG_ALLOC-NEXT: renamable $vgpr0 = IMPLICIT_DEF ; REG_ALLOC-NEXT: $exec = S_MOV_B64_term renamable $sgpr6_sgpr7 ; REG_ALLOC-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec ; REG_ALLOC-NEXT: S_BRANCH %bb.2 ; REG_ALLOC-NEXT: {{ $}} ; REG_ALLOC-NEXT: bb.1: ; REG_ALLOC-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000) - ; REG_ALLOC-NEXT: liveins: $sgpr0, $vgpr8, $vgpr10, $sgpr4_sgpr5, $sgpr6_sgpr7, $vgpr11_vgpr12_vgpr13_vgpr14:0x0000000000000003, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7:0x0000000000000300 + ; REG_ALLOC-NEXT: liveins: $sgpr0, $vgpr0, $vgpr10, $sgpr4_sgpr5, $sgpr6_sgpr7, $vgpr5_vgpr6_vgpr7_vgpr8:0x0000000000000003, $vgpr6_vgpr7_vgpr8_vgpr9:0x0000000000000003 ; REG_ALLOC-NEXT: {{ $}} ; REG_ALLOC-NEXT: renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec ; REG_ALLOC-NEXT: $exec = S_XOR_B64_term $exec, renamable $sgpr2_sgpr3, implicit-def $scc @@ -42,33 +44,33 @@ body: | ; REG_ALLOC-NEXT: liveins: $sgpr0, $sgpr1, $vgpr10, $sgpr4_sgpr5, $sgpr6_sgpr7 ; REG_ALLOC-NEXT: {{ $}} ; REG_ALLOC-NEXT: renamable $sgpr1 = S_OR_B32 killed renamable $sgpr1, 2, implicit-def dead $scc - ; REG_ALLOC-NEXT: renamable $vgpr8 = COPY killed renamable $sgpr1 - ; REG_ALLOC-NEXT: renamable $vgpr11_vgpr12 = IMPLICIT_DEF - ; REG_ALLOC-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF + ; REG_ALLOC-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr1 + ; REG_ALLOC-NEXT: renamable $vgpr5_vgpr6 = IMPLICIT_DEF + ; REG_ALLOC-NEXT: renamable $vgpr6_vgpr7_vgpr8_vgpr9 = IMPLICIT_DEF ; REG_ALLOC-NEXT: S_BRANCH %bb.1 ; REG_ALLOC-NEXT: {{ $}} ; REG_ALLOC-NEXT: bb.3: ; REG_ALLOC-NEXT: successors: %bb.5(0x80000000) - ; REG_ALLOC-NEXT: liveins: $sgpr0, $vgpr10, $sgpr2_sgpr3, $sgpr4_sgpr5, $vgpr11_vgpr12_vgpr13_vgpr14:0x0000000000000003, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7:0x0000000000000300 + ; REG_ALLOC-NEXT: liveins: $sgpr0, $vgpr10, $sgpr2_sgpr3, $sgpr4_sgpr5, $vgpr5_vgpr6_vgpr7_vgpr8:0x0000000000000003, $vgpr6_vgpr7_vgpr8_vgpr9:0x0000000000000003 ; REG_ALLOC-NEXT: {{ $}} - ; REG_ALLOC-NEXT: renamable $sgpr1 = V_READFIRSTLANE_B32 killed $vgpr11, implicit $exec - ; REG_ALLOC-NEXT: renamable $sgpr6 = V_READFIRSTLANE_B32 killed $vgpr4, implicit $exec + ; REG_ALLOC-NEXT: renamable $sgpr1 = V_READFIRSTLANE_B32 killed $vgpr5, implicit $exec + ; REG_ALLOC-NEXT: renamable $sgpr6 = V_READFIRSTLANE_B32 killed $vgpr6, implicit $exec ; REG_ALLOC-NEXT: S_CMP_EQ_U32 killed renamable $sgpr6, killed renamable $sgpr1, implicit-def $scc ; REG_ALLOC-NEXT: renamable $sgpr1 = S_CSELECT_B32 1, 0, implicit $scc - ; REG_ALLOC-NEXT: renamable $vgpr8 = COPY killed renamable $sgpr1 + ; REG_ALLOC-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr1 ; REG_ALLOC-NEXT: S_BRANCH %bb.5 ; REG_ALLOC-NEXT: {{ $}} ; REG_ALLOC-NEXT: bb.4: - ; REG_ALLOC-NEXT: liveins: $sgpr0, $vgpr8, $vgpr10, $sgpr4_sgpr5 + ; REG_ALLOC-NEXT: liveins: $sgpr0, $vgpr0, $vgpr10, $sgpr4_sgpr5 ; REG_ALLOC-NEXT: {{ $}} ; REG_ALLOC-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (<4 x s32>), addrspace 4) - ; REG_ALLOC-NEXT: renamable $vgpr0 = V_ADD_U32_e64 killed $sgpr0, killed $vgpr10, 0, implicit $exec - ; REG_ALLOC-NEXT: BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr8, killed renamable $vgpr0, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; REG_ALLOC-NEXT: renamable $vgpr1 = V_ADD_U32_e64 killed $sgpr0, killed $vgpr10, 0, implicit $exec + ; REG_ALLOC-NEXT: BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) ; REG_ALLOC-NEXT: S_ENDPGM 0 ; REG_ALLOC-NEXT: {{ $}} ; REG_ALLOC-NEXT: bb.5: ; REG_ALLOC-NEXT: successors: %bb.4(0x80000000) - ; REG_ALLOC-NEXT: liveins: $sgpr0, $vgpr8, $vgpr10, $sgpr2_sgpr3, $sgpr4_sgpr5 + ; REG_ALLOC-NEXT: liveins: $sgpr0, $vgpr0, $vgpr10, $sgpr2_sgpr3, $sgpr4_sgpr5 ; REG_ALLOC-NEXT: {{ $}} ; REG_ALLOC-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc ; REG_ALLOC-NEXT: S_BRANCH %bb.4 @@ -78,26 +80,28 @@ body: | ; DEAD_INST_DEL-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; DEAD_INST_DEL-NEXT: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr10, $sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11 ; DEAD_INST_DEL-NEXT: {{ $}} - ; DEAD_INST_DEL-NEXT: renamable $vgpr5_vgpr6_vgpr7_vgpr8 = BUFFER_LOAD_DWORDX4_OFFEN renamable $vgpr3, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) - ; DEAD_INST_DEL-NEXT: renamable $vgpr15_vgpr16_vgpr17_vgpr18 = BUFFER_LOAD_DWORDX4_OFFEN renamable $vgpr2, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) - ; DEAD_INST_DEL-NEXT: renamable $vgpr11_vgpr12_vgpr13_vgpr14 = BUFFER_LOAD_DWORDX4_OFFEN renamable $vgpr0, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; DEAD_INST_DEL-NEXT: renamable $vgpr11_vgpr12_vgpr13_vgpr14 = BUFFER_LOAD_DWORDX4_OFFEN renamable $vgpr3, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; DEAD_INST_DEL-NEXT: renamable $vgpr12_vgpr13_vgpr14_vgpr15 = BUFFER_LOAD_DWORDX4_OFFEN renamable $vgpr2, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; DEAD_INST_DEL-NEXT: renamable $vgpr5_vgpr6_vgpr7_vgpr8 = BUFFER_LOAD_DWORDX4_OFFEN renamable $vgpr0, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; DEAD_INST_DEL-NEXT: renamable $vgpr6_vgpr7_vgpr8_vgpr9 = BUFFER_LOAD_DWORDX4_OFFEN renamable $vgpr4, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; DEAD_INST_DEL-NEXT: KILL killed renamable $vgpr4 ; DEAD_INST_DEL-NEXT: KILL killed renamable $vgpr2 ; DEAD_INST_DEL-NEXT: KILL killed renamable $vgpr0 ; DEAD_INST_DEL-NEXT: KILL killed renamable $vgpr3 - ; DEAD_INST_DEL-NEXT: renamable $sgpr12 = V_READFIRSTLANE_B32 killed $vgpr5, implicit $exec - ; DEAD_INST_DEL-NEXT: renamable $vgpr4_vgpr5_vgpr6_vgpr7 = BUFFER_LOAD_DWORDX4_OFFEN killed renamable $vgpr4, killed renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) - ; DEAD_INST_DEL-NEXT: renamable $sgpr13 = V_READFIRSTLANE_B32 killed $vgpr15, implicit $exec + ; DEAD_INST_DEL-NEXT: KILL killed renamable $sgpr8_sgpr9_sgpr10_sgpr11 + ; DEAD_INST_DEL-NEXT: renamable $sgpr8 = V_READFIRSTLANE_B32 killed $vgpr11, implicit $exec + ; DEAD_INST_DEL-NEXT: renamable $sgpr9 = V_READFIRSTLANE_B32 killed $vgpr12, implicit $exec ; DEAD_INST_DEL-NEXT: renamable $sgpr6_sgpr7 = V_CMP_NE_U32_e64 killed $vgpr1, 0, implicit $exec - ; DEAD_INST_DEL-NEXT: S_CMP_EQ_U64 killed renamable $sgpr12_sgpr13, killed renamable $sgpr2_sgpr3, implicit-def $scc + ; DEAD_INST_DEL-NEXT: S_CMP_EQ_U64 killed renamable $sgpr8_sgpr9, killed renamable $sgpr2_sgpr3, implicit-def $scc ; DEAD_INST_DEL-NEXT: renamable $sgpr1 = S_CSELECT_B32 1, 0, implicit $scc - ; DEAD_INST_DEL-NEXT: renamable $vgpr8 = IMPLICIT_DEF + ; DEAD_INST_DEL-NEXT: renamable $vgpr0 = IMPLICIT_DEF ; DEAD_INST_DEL-NEXT: $exec = S_MOV_B64_term renamable $sgpr6_sgpr7 ; DEAD_INST_DEL-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec ; DEAD_INST_DEL-NEXT: S_BRANCH %bb.2 ; DEAD_INST_DEL-NEXT: {{ $}} ; DEAD_INST_DEL-NEXT: bb.1: ; DEAD_INST_DEL-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000) - ; DEAD_INST_DEL-NEXT: liveins: $sgpr0, $vgpr8, $vgpr10, $sgpr4_sgpr5, $sgpr6_sgpr7, $vgpr11_vgpr12_vgpr13_vgpr14:0x0000000000000003, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7:0x0000000000000300 + ; DEAD_INST_DEL-NEXT: liveins: $sgpr0, $vgpr0, $vgpr10, $sgpr4_sgpr5, $sgpr6_sgpr7, $vgpr5_vgpr6_vgpr7_vgpr8:0x0000000000000003, $vgpr6_vgpr7_vgpr8_vgpr9:0x0000000000000003 ; DEAD_INST_DEL-NEXT: {{ $}} ; DEAD_INST_DEL-NEXT: renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec ; DEAD_INST_DEL-NEXT: $exec = S_XOR_B64_term $exec, renamable $sgpr2_sgpr3, implicit-def $scc @@ -109,33 +113,33 @@ body: | ; DEAD_INST_DEL-NEXT: liveins: $sgpr0, $sgpr1, $vgpr10, $sgpr4_sgpr5, $sgpr6_sgpr7 ; DEAD_INST_DEL-NEXT: {{ $}} ; DEAD_INST_DEL-NEXT: renamable $sgpr1 = S_OR_B32 killed renamable $sgpr1, 2, implicit-def dead $scc - ; DEAD_INST_DEL-NEXT: renamable $vgpr8 = COPY killed renamable $sgpr1 - ; DEAD_INST_DEL-NEXT: renamable $vgpr11_vgpr12 = IMPLICIT_DEF - ; DEAD_INST_DEL-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF + ; DEAD_INST_DEL-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr1 + ; DEAD_INST_DEL-NEXT: renamable $vgpr5_vgpr6 = IMPLICIT_DEF + ; DEAD_INST_DEL-NEXT: renamable $vgpr6_vgpr7_vgpr8_vgpr9 = IMPLICIT_DEF ; DEAD_INST_DEL-NEXT: S_BRANCH %bb.1 ; DEAD_INST_DEL-NEXT: {{ $}} ; DEAD_INST_DEL-NEXT: bb.3: ; DEAD_INST_DEL-NEXT: successors: %bb.5(0x80000000) - ; DEAD_INST_DEL-NEXT: liveins: $sgpr0, $vgpr10, $sgpr2_sgpr3, $sgpr4_sgpr5, $vgpr11_vgpr12_vgpr13_vgpr14:0x0000000000000003, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7:0x0000000000000300 + ; DEAD_INST_DEL-NEXT: liveins: $sgpr0, $vgpr10, $sgpr2_sgpr3, $sgpr4_sgpr5, $vgpr5_vgpr6_vgpr7_vgpr8:0x0000000000000003, $vgpr6_vgpr7_vgpr8_vgpr9:0x0000000000000003 ; DEAD_INST_DEL-NEXT: {{ $}} - ; DEAD_INST_DEL-NEXT: renamable $sgpr1 = V_READFIRSTLANE_B32 killed $vgpr11, implicit $exec - ; DEAD_INST_DEL-NEXT: renamable $sgpr6 = V_READFIRSTLANE_B32 killed $vgpr4, implicit $exec + ; DEAD_INST_DEL-NEXT: renamable $sgpr1 = V_READFIRSTLANE_B32 killed $vgpr5, implicit $exec + ; DEAD_INST_DEL-NEXT: renamable $sgpr6 = V_READFIRSTLANE_B32 killed $vgpr6, implicit $exec ; DEAD_INST_DEL-NEXT: S_CMP_EQ_U32 killed renamable $sgpr6, killed renamable $sgpr1, implicit-def $scc ; DEAD_INST_DEL-NEXT: renamable $sgpr1 = S_CSELECT_B32 1, 0, implicit $scc - ; DEAD_INST_DEL-NEXT: renamable $vgpr8 = COPY killed renamable $sgpr1 + ; DEAD_INST_DEL-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr1 ; DEAD_INST_DEL-NEXT: S_BRANCH %bb.5 ; DEAD_INST_DEL-NEXT: {{ $}} ; DEAD_INST_DEL-NEXT: bb.4: - ; DEAD_INST_DEL-NEXT: liveins: $sgpr0, $vgpr8, $vgpr10, $sgpr4_sgpr5 + ; DEAD_INST_DEL-NEXT: liveins: $sgpr0, $vgpr0, $vgpr10, $sgpr4_sgpr5 ; DEAD_INST_DEL-NEXT: {{ $}} ; DEAD_INST_DEL-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (<4 x s32>), addrspace 4) - ; DEAD_INST_DEL-NEXT: renamable $vgpr0 = V_ADD_U32_e64 killed $sgpr0, killed $vgpr10, 0, implicit $exec - ; DEAD_INST_DEL-NEXT: BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr8, killed renamable $vgpr0, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) + ; DEAD_INST_DEL-NEXT: renamable $vgpr1 = V_ADD_U32_e64 killed $sgpr0, killed $vgpr10, 0, implicit $exec + ; DEAD_INST_DEL-NEXT: BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) ; DEAD_INST_DEL-NEXT: S_ENDPGM 0 ; DEAD_INST_DEL-NEXT: {{ $}} ; DEAD_INST_DEL-NEXT: bb.5: ; DEAD_INST_DEL-NEXT: successors: %bb.4(0x80000000) - ; DEAD_INST_DEL-NEXT: liveins: $sgpr0, $vgpr8, $vgpr10, $sgpr2_sgpr3, $sgpr4_sgpr5 + ; DEAD_INST_DEL-NEXT: liveins: $sgpr0, $vgpr0, $vgpr10, $sgpr2_sgpr3, $sgpr4_sgpr5 ; DEAD_INST_DEL-NEXT: {{ $}} ; DEAD_INST_DEL-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc ; DEAD_INST_DEL-NEXT: S_BRANCH %bb.4 diff --git a/llvm/test/CodeGen/AMDGPU/use-after-free-after-cleanup-failed-vreg.ll b/llvm/test/CodeGen/AMDGPU/use-after-free-after-cleanup-failed-vreg.ll index ea127323f3e05..50efc06237d5b 100644 --- a/llvm/test/CodeGen/AMDGPU/use-after-free-after-cleanup-failed-vreg.ll +++ b/llvm/test/CodeGen/AMDGPU/use-after-free-after-cleanup-failed-vreg.ll @@ -1,4 +1,4 @@ -; RUN: not llc -mcpu=gfx1100 -mtriple=amdgcn-amd-amdhsa -stress-regalloc=4 -filetype=null -verify-machineinstrs %s 2>&1 | FileCheck %s +; RUN: not llc -mcpu=gfx1100 -mtriple=amdgcn-amd-amdhsa -stress-regalloc=4 -amdgpu-enable-rewrite-partial-reg-uses=0 -filetype=null -verify-machineinstrs %s 2>&1 | FileCheck %s ; CHECK: error: :0:0: ran out of registers during register allocation in function 'f' ; CHECK-NOT: Bad machine code