diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 044ea866342c2..10283a2206732 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -7527,6 +7527,11 @@ void SIInstrInfo::legalizeOperandsVALUt16(MachineInstr &MI, unsigned OpIdx, return; unsigned Opcode = MI.getOpcode(); + if (Opcode == AMDGPU::REG_SEQUENCE) { + legalizeSpecialInst_t16(MI, MRI); + return; + } + MachineBasicBlock *MBB = MI.getParent(); // Legalize operands and check for size mismatch if (!OpIdx || OpIdx >= MI.getNumExplicitOperands() || @@ -7565,6 +7570,65 @@ void SIInstrInfo::legalizeOperandsVALUt16(MachineInstr &MI, legalizeOperandsVALUt16(MI, OpIdx, MRI); } +// Legalize operands of size-mismatches special inst between 16bit and 32bit +// in moveToVALU lowering in true16 mode. This caused by 16bit +// placed in both vgpr16 and sreg32 by isel. Including cases: +// Copy +// 1. dst32 = copy vgpr16 => dst32 = REG_SEQUENCE(vgpr16, lo16) +// 2. dst32 = copy .lo16:vgpr32 / dst32 = copy .hi16:vgpr32 +// => dst32 = REG_SEQUENCE(.lo16/hi16:vgpr32, lo16) +// 3. sgpr16 = copy vgpr32/... (skipped, isel do not generate sgpr16) +// +// Reg_sequence +// dst32 = reg_sequence(vgpr32, lo16/hi16) +// => dst32 = reg_sequence(.lo16:vgpr32, lo16/hi16) +// +// This can be removed after we have sgpr16 in place. +void SIInstrInfo::legalizeSpecialInst_t16(MachineInstr &Inst, + MachineRegisterInfo &MRI) const { + unsigned Opcode = Inst.getOpcode(); + const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(Inst); + switch (Opcode) { + case AMDGPU::COPY: { + Register SrcReg = Inst.getOperand(1).getReg(); + if (!SrcReg.isVirtual() || !RI.isVGPR(MRI, SrcReg)) + return; + + bool SetSubReg = false; + Register SrcSubReg = Inst.getOperand(1).getSubReg(); + const TargetRegisterClass *SrcRegRC = getOpRegClass(Inst, 1); + if (RI.getMatchingSuperRegClass(NewDstRC, SrcRegRC, AMDGPU::lo16)) { + } else if (NewDstRC == &AMDGPU::VGPR_32RegClass && + (SrcSubReg == AMDGPU::hi16 || SrcSubReg == AMDGPU::lo16)) { + SetSubReg = true; + } else + return; + + Register Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass); + BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(), + get(AMDGPU::IMPLICIT_DEF), Undef); + Inst.setDesc(get(AMDGPU::REG_SEQUENCE)); + if (SetSubReg) + Inst.getOperand(1).setSubReg(SrcSubReg); + + Inst.addOperand(MachineOperand::CreateImm(AMDGPU::lo16)); + Inst.addOperand(MachineOperand::CreateReg(Undef, 0)); + Inst.addOperand(MachineOperand::CreateImm(AMDGPU::hi16)); + } break; + case AMDGPU::REG_SEQUENCE: { + for (unsigned I = 0, E = (Inst.getNumOperands() - 1) / 2; I < E; ++I) { + Register SrcReg = Inst.getOperand(1 + 2 * I).getReg(); + auto SubReg = Inst.getOperand(1 + 2 * I + 1).getImm(); + if (SrcReg.isVirtual() && RI.isVGPR(MRI, SrcReg) && + MRI.constrainRegClass(SrcReg, &AMDGPU::VGPR_32RegClass) && + (SubReg == AMDGPU::lo16 || SubReg == AMDGPU::hi16)) { + Inst.getOperand(1 + 2 * I).setSubReg(AMDGPU::lo16); + } + } + } break; + } +} + void SIInstrInfo::moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const { @@ -8083,6 +8147,9 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist, return; } + if (ST.useRealTrue16Insts()) + legalizeSpecialInst_t16(Inst, MRI); + if (Inst.isCopy() && Inst.getOperand(1).getReg().isVirtual() && NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) { // Instead of creating a copy where src and dst are the same register @@ -8105,38 +8172,6 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist, return; } - // If this is a v2s copy between 16bit and 32bit reg, - // replace vgpr copy to reg_sequence/extract_subreg - // This can be remove after we have sgpr16 in place - if (ST.useRealTrue16Insts() && Inst.isCopy() && - Inst.getOperand(1).getReg().isVirtual() && - RI.isVGPR(MRI, Inst.getOperand(1).getReg())) { - const TargetRegisterClass *SrcRegRC = getOpRegClass(Inst, 1); - if (RI.getMatchingSuperRegClass(NewDstRC, SrcRegRC, AMDGPU::lo16)) { - Register NewDstReg = MRI.createVirtualRegister(NewDstRC); - Register Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass); - BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(), - get(AMDGPU::IMPLICIT_DEF), Undef); - BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(), - get(AMDGPU::REG_SEQUENCE), NewDstReg) - .addReg(Inst.getOperand(1).getReg()) - .addImm(AMDGPU::lo16) - .addReg(Undef) - .addImm(AMDGPU::hi16); - Inst.eraseFromParent(); - MRI.replaceRegWith(DstReg, NewDstReg); - addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist); - return; - } else if (RI.getMatchingSuperRegClass(SrcRegRC, NewDstRC, - AMDGPU::lo16)) { - Inst.getOperand(1).setSubReg(AMDGPU::lo16); - Register NewDstReg = MRI.createVirtualRegister(NewDstRC); - MRI.replaceRegWith(DstReg, NewDstReg); - addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist); - return; - } - } - Register NewDstReg = MRI.createVirtualRegister(NewDstRC); MRI.replaceRegWith(DstReg, NewDstReg); legalizeOperands(Inst, MDT); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 31a2d55e1baad..9728ed4b6e002 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -1375,6 +1375,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { MachineRegisterInfo &MRI) const; void legalizeOperandsVALUt16(MachineInstr &Inst, unsigned OpIdx, MachineRegisterInfo &MRI) const; + void legalizeSpecialInst_t16(MachineInstr &Inst, + MachineRegisterInfo &MRI) const; /// Replace the instructions opcode with the equivalent VALU /// opcode. This function will also move the users of MachineInstruntions diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.640bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.640bit.ll index 47cb6bd3b3bb6..03672fb760586 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.640bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.640bit.ll @@ -4913,12 +4913,10 @@ define inreg <20 x i32> @bitcast_v40i16_to_v20i32_scalar(<40 x i16> inreg %a, i3 ; GFX11-TRUE16-LABEL: bitcast_v40i16_to_v20i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v1.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v0.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v35, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v34, 0xffff, v1 ; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 @@ -8342,12 +8340,10 @@ define inreg <20 x i32> @bitcast_v40f16_to_v20i32_scalar(<40 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v40f16_to_v20i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v1.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v0.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v35, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v34, 0xffff, v1 ; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 @@ -12629,12 +12625,10 @@ define inreg <20 x float> @bitcast_v40i16_to_v20f32_scalar(<40 x i16> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v40i16_to_v20f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v1.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v0.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v35, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v34, 0xffff, v1 ; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 @@ -16043,12 +16037,10 @@ define inreg <20 x float> @bitcast_v40f16_to_v20f32_scalar(<40 x half> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v40f16_to_v20f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v1.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v0.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v35, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v34, 0xffff, v1 ; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 @@ -19655,12 +19647,10 @@ define inreg <10 x i64> @bitcast_v40i16_to_v10i64_scalar(<40 x i16> inreg %a, i3 ; GFX11-TRUE16-LABEL: bitcast_v40i16_to_v10i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v1.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v0.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v35, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v34, 0xffff, v1 ; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 @@ -23094,12 +23084,10 @@ define inreg <10 x i64> @bitcast_v40f16_to_v10i64_scalar(<40 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v40f16_to_v10i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v1.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v0.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v35, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v34, 0xffff, v1 ; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 @@ -25911,12 +25899,10 @@ define inreg <10 x double> @bitcast_v40i16_to_v10f64_scalar(<40 x i16> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v40i16_to_v10f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v1.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v0.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v35, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v34, 0xffff, v1 ; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 @@ -29258,12 +29244,10 @@ define inreg <10 x double> @bitcast_v40f16_to_v10f64_scalar(<40 x half> inreg %a ; GFX11-TRUE16-LABEL: bitcast_v40f16_to_v10f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v1.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v0.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v35, 0xffff, v0 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v34, 0xffff, v1 ; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s29, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s28, 16 @@ -31057,12 +31041,10 @@ define inreg <40 x half> @bitcast_v40i16_to_v40f16_scalar(<40 x i16> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v40i16_to_v40f16_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.l, v1.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.l, v0.h ; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s29, 16 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v19.h ; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s28, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s27, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s26, 16 @@ -31074,12 +31056,12 @@ define inreg <40 x half> @bitcast_v40i16_to_v40f16_scalar(<40 x i16> inreg %a, i ; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s20, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s19, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s17, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s16, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s3, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s2, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s0, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s0, 16 ; GFX11-TRUE16-NEXT: s_mov_b32 s46, 0 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB57_3 @@ -31103,11 +31085,11 @@ define inreg <40 x half> @bitcast_v40i16_to_v40f16_scalar(<40 x i16> inreg %a, i ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s20, s12 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s19, s11 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s18, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s17, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s17, s9 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s16, s7 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s6 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s8 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s4 ; GFX11-TRUE16-NEXT: v_pk_add_u16 v15, s29, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v16, s28, 3 op_sel_hi:[1,0] @@ -31123,7 +31105,7 @@ define inreg <40 x half> @bitcast_v40i16_to_v40f16_scalar(<40 x i16> inreg %a, i ; GFX11-TRUE16-NEXT: v_pk_add_u16 v6, s12, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v7, s11, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v8, s10, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s8, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s9, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v21, s0, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v20, s1, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v4, s2, 3 op_sel_hi:[1,0] @@ -31168,9 +31150,9 @@ define inreg <40 x half> @bitcast_v40i16_to_v40f16_scalar(<40 x i16> inreg %a, i ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, s15 :: v_dual_mov_b32 v29, s14 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, s13 :: v_dual_mov_b32 v31, s12 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, s11 :: v_dual_mov_b32 v33, s10 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, s8 :: v_dual_mov_b32 v35, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, s9 :: v_dual_mov_b32 v35, s7 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, s6 :: v_dual_mov_b32 v37, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, s4 :: v_dual_mov_b32 v39, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, s4 :: v_dual_mov_b32 v39, s8 ; GFX11-TRUE16-NEXT: .LBB57_5: ; %end ; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xffff, v4 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v49, 0xffff, v2 @@ -32879,12 +32861,10 @@ define inreg <40 x i16> @bitcast_v40f16_to_v40i16_scalar(<40 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v40f16_to_v40i16_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.l, v1.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.l, v0.h ; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s29, 16 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v19.h ; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s28, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s27, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s26, 16 @@ -32896,12 +32876,12 @@ define inreg <40 x i16> @bitcast_v40f16_to_v40i16_scalar(<40 x half> inreg %a, i ; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s20, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s19, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s18, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s17, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s17, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s16, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s3, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s2, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s0, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s0, 16 ; GFX11-TRUE16-NEXT: s_mov_b32 s46, 0 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB59_3 @@ -32925,11 +32905,11 @@ define inreg <40 x i16> @bitcast_v40f16_to_v40i16_scalar(<40 x half> inreg %a, i ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s20, s12 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s19, s11 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s18, s10 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s17, s8 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s17, s9 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s16, s7 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s6 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s9 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s8 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s4 ; GFX11-TRUE16-NEXT: v_pk_add_f16 v15, 0x200, s29 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v16, 0x200, s28 op_sel_hi:[0,1] @@ -32945,7 +32925,7 @@ define inreg <40 x i16> @bitcast_v40f16_to_v40i16_scalar(<40 x half> inreg %a, i ; GFX11-TRUE16-NEXT: v_pk_add_f16 v6, 0x200, s12 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v7, 0x200, s11 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v8, 0x200, s10 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s8 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s9 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v21, 0x200, s0 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v20, 0x200, s1 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v4, 0x200, s2 op_sel_hi:[0,1] @@ -32990,9 +32970,9 @@ define inreg <40 x i16> @bitcast_v40f16_to_v40i16_scalar(<40 x half> inreg %a, i ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, s15 :: v_dual_mov_b32 v29, s14 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, s13 :: v_dual_mov_b32 v31, s12 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, s11 :: v_dual_mov_b32 v33, s10 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, s8 :: v_dual_mov_b32 v35, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, s9 :: v_dual_mov_b32 v35, s7 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, s6 :: v_dual_mov_b32 v37, s5 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, s4 :: v_dual_mov_b32 v39, s9 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, s4 :: v_dual_mov_b32 v39, s8 ; GFX11-TRUE16-NEXT: .LBB59_5: ; %end ; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xffff, v4 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v49, 0xffff, v2 diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.704bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.704bit.ll index 2cc7c448b2e11..afb2e96a0079f 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.704bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.704bit.ll @@ -5328,15 +5328,11 @@ define inreg <22 x i32> @bitcast_v44i16_to_v22i32_scalar(<44 x i16> inreg %a, i3 ; GFX11-TRUE16-LABEL: bitcast_v44i16_to_v22i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v3.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v2.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v39, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v37, 0xffff, v2 @@ -9137,15 +9133,11 @@ define inreg <22 x i32> @bitcast_v44f16_to_v22i32_scalar(<44 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v44f16_to_v22i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v3.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v2.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v39, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v37, 0xffff, v2 @@ -13805,15 +13797,11 @@ define inreg <22 x float> @bitcast_v44i16_to_v22f32_scalar(<44 x i16> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v44i16_to_v22f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v3.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v2.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v39, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v37, 0xffff, v2 @@ -17607,15 +17595,11 @@ define inreg <22 x float> @bitcast_v44f16_to_v22f32_scalar(<44 x half> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v44f16_to_v22f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v3.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v2.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v39, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v37, 0xffff, v2 @@ -21568,15 +21552,11 @@ define inreg <11 x i64> @bitcast_v44i16_to_v11i64_scalar(<44 x i16> inreg %a, i3 ; GFX11-TRUE16-LABEL: bitcast_v44i16_to_v11i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v3.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v2.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v39, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v37, 0xffff, v2 @@ -25389,15 +25369,11 @@ define inreg <11 x i64> @bitcast_v44f16_to_v11i64_scalar(<44 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v44f16_to_v11i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v3.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v2.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v39, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v37, 0xffff, v2 @@ -28498,15 +28474,11 @@ define inreg <11 x double> @bitcast_v44i16_to_v11f64_scalar(<44 x i16> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v44i16_to_v11f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v3.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v2.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v39, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v37, 0xffff, v2 @@ -32224,15 +32196,11 @@ define inreg <11 x double> @bitcast_v44f16_to_v11f64_scalar(<44 x half> inreg %a ; GFX11-TRUE16-LABEL: bitcast_v44f16_to_v11f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v3.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v2.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v39, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v37, 0xffff, v2 @@ -34283,15 +34251,11 @@ define inreg <44 x half> @bitcast_v44i16_to_v44f16_scalar(<44 x i16> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v44i16_to_v44f16_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.l, v3.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.l, v2.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.h, v21.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v21.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v21.h ; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s29, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s28, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s27, 16 @@ -34301,15 +34265,15 @@ define inreg <44 x half> @bitcast_v44i16_to_v44f16_scalar(<44 x i16> inreg %a, i ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s23, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s22, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s20, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s19, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s18, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s2, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s0, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s0, 16 ; GFX11-TRUE16-NEXT: s_mov_b32 s46, 0 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB57_3 @@ -34334,14 +34298,14 @@ define inreg <44 x half> @bitcast_v44i16_to_v44f16_scalar(<44 x i16> inreg %a, i ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s23, s15 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s22, s14 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s21, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s20, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s20, s12 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s19, s10 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s18, s9 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s17, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s16, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s16, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s11 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s4 ; GFX11-TRUE16-NEXT: v_pk_add_u16 v3, v3, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v2, v2, 3 op_sel_hi:[1,0] @@ -34356,7 +34320,7 @@ define inreg <44 x half> @bitcast_v44i16_to_v44f16_scalar(<44 x i16> inreg %a, i ; GFX11-TRUE16-NEXT: v_pk_add_u16 v7, s15, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v8, s14, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s13, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v10, s11, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v10, s12, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v11, s10, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v4, s9, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, s8, 3 op_sel_hi:[1,0] @@ -34364,7 +34328,7 @@ define inreg <44 x half> @bitcast_v44i16_to_v44f16_scalar(<44 x i16> inreg %a, i ; GFX11-TRUE16-NEXT: v_pk_add_u16 v24, s1, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v23, s2, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v22, s3, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v6, s6, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v6, s7, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v51, 16, v25 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v50, 16, v24 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v49, 16, v23 @@ -34404,11 +34368,11 @@ define inreg <44 x half> @bitcast_v44i16_to_v44f16_scalar(<44 x i16> inreg %a, i ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, s43 :: v_dual_mov_b32 v29, s42 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, s41 :: v_dual_mov_b32 v31, s40 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, s15 :: v_dual_mov_b32 v33, s14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, s13 :: v_dual_mov_b32 v35, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, s13 :: v_dual_mov_b32 v35, s12 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, s10 :: v_dual_mov_b32 v37, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, s8 :: v_dual_mov_b32 v39, s6 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v48, s5 :: v_dual_mov_b32 v49, s7 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, s4 :: v_dual_mov_b32 v51, s12 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, s8 :: v_dual_mov_b32 v39, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v48, s6 :: v_dual_mov_b32 v49, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, s4 :: v_dual_mov_b32 v51, s11 ; GFX11-TRUE16-NEXT: .LBB57_5: ; %end ; GFX11-TRUE16-NEXT: v_and_b32_e32 v25, 0xffff, v25 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v52, 0xffff, v24 @@ -36279,15 +36243,11 @@ define inreg <44 x i16> @bitcast_v44f16_to_v44i16_scalar(<44 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v44f16_to_v44i16_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.l, v3.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.l, v2.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.h, v21.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v21.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v21.h ; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s29, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s28, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s27, 16 @@ -36297,15 +36257,15 @@ define inreg <44 x i16> @bitcast_v44f16_to_v44i16_scalar(<44 x half> inreg %a, i ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s23, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s22, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s21, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s20, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s20, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s19, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s18, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s2, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s0, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s0, 16 ; GFX11-TRUE16-NEXT: s_mov_b32 s46, 0 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB59_3 @@ -36330,14 +36290,14 @@ define inreg <44 x i16> @bitcast_v44f16_to_v44i16_scalar(<44 x half> inreg %a, i ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s23, s15 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s22, s14 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s21, s13 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s20, s11 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s20, s12 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s19, s10 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s18, s9 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s17, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s16, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s12 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s16, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s11 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s4 ; GFX11-TRUE16-NEXT: v_pk_add_f16 v3, 0x200, v3 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, 0x200, v2 op_sel_hi:[0,1] @@ -36352,7 +36312,7 @@ define inreg <44 x i16> @bitcast_v44f16_to_v44i16_scalar(<44 x half> inreg %a, i ; GFX11-TRUE16-NEXT: v_pk_add_f16 v7, 0x200, s15 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v8, 0x200, s14 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s13 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v10, 0x200, s11 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v10, 0x200, s12 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v11, 0x200, s10 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v4, 0x200, s9 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, s8 op_sel_hi:[0,1] @@ -36360,7 +36320,7 @@ define inreg <44 x i16> @bitcast_v44f16_to_v44i16_scalar(<44 x half> inreg %a, i ; GFX11-TRUE16-NEXT: v_pk_add_f16 v24, 0x200, s1 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v23, 0x200, s2 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v22, 0x200, s3 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v6, 0x200, s6 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v6, 0x200, s7 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v51, 16, v25 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v50, 16, v24 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v49, 16, v23 @@ -36400,11 +36360,11 @@ define inreg <44 x i16> @bitcast_v44f16_to_v44i16_scalar(<44 x half> inreg %a, i ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v28, s43 :: v_dual_mov_b32 v29, s42 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, s41 :: v_dual_mov_b32 v31, s40 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, s15 :: v_dual_mov_b32 v33, s14 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, s13 :: v_dual_mov_b32 v35, s11 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, s13 :: v_dual_mov_b32 v35, s12 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, s10 :: v_dual_mov_b32 v37, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, s8 :: v_dual_mov_b32 v39, s6 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v48, s5 :: v_dual_mov_b32 v49, s7 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, s4 :: v_dual_mov_b32 v51, s12 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, s8 :: v_dual_mov_b32 v39, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v48, s6 :: v_dual_mov_b32 v49, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, s4 :: v_dual_mov_b32 v51, s11 ; GFX11-TRUE16-NEXT: .LBB59_5: ; %end ; GFX11-TRUE16-NEXT: v_and_b32_e32 v25, 0xffff, v25 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v52, 0xffff, v24 diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.768bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.768bit.ll index c35e183fa787f..6dc830c9e38c1 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.768bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.768bit.ll @@ -5805,19 +5805,13 @@ define inreg <24 x i32> @bitcast_v48i16_to_v24i32_scalar(<48 x i16> inreg %a, i3 ; GFX11-TRUE16-LABEL: bitcast_v48i16_to_v24i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v5.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v4.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v51, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v50, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v49, 0xffff, v2 @@ -10044,19 +10038,13 @@ define inreg <24 x i32> @bitcast_v48f16_to_v24i32_scalar(<48 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v48f16_to_v24i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v5.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v4.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v51, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v50, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v49, 0xffff, v2 @@ -15153,19 +15141,13 @@ define inreg <24 x float> @bitcast_v48i16_to_v24f32_scalar(<48 x i16> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v48i16_to_v24f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v5.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v4.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v51, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v50, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v49, 0xffff, v2 @@ -19382,19 +19364,13 @@ define inreg <24 x float> @bitcast_v48f16_to_v24f32_scalar(<48 x half> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v48f16_to_v24f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v5.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v4.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v51, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v50, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v49, 0xffff, v2 @@ -23764,19 +23740,13 @@ define inreg <12 x i64> @bitcast_v48i16_to_v12i64_scalar(<48 x i16> inreg %a, i3 ; GFX11-TRUE16-LABEL: bitcast_v48i16_to_v12i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v5.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v4.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v51, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v50, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v49, 0xffff, v2 @@ -28015,19 +27985,13 @@ define inreg <12 x i64> @bitcast_v48f16_to_v12i64_scalar(<48 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v48f16_to_v12i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v5.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v4.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v51, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v50, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v49, 0xffff, v2 @@ -31492,19 +31456,13 @@ define inreg <12 x double> @bitcast_v48i16_to_v12f64_scalar(<48 x i16> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v48i16_to_v12f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v5.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v4.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v51, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v50, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v49, 0xffff, v2 @@ -35639,19 +35597,13 @@ define inreg <12 x double> @bitcast_v48f16_to_v12f64_scalar(<48 x half> inreg %a ; GFX11-TRUE16-LABEL: bitcast_v48f16_to_v12f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v5.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v4.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v51, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v50, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v49, 0xffff, v2 @@ -37964,19 +37916,13 @@ define inreg <48 x half> @bitcast_v48i16_to_v48f16_scalar(<48 x i16> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v48i16_to_v48f16_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.l, v5.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.l, v4.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.h, v23.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v23.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.h, v23.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v23.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v23.h ; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s29, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s28, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s27, 16 @@ -37984,17 +37930,17 @@ define inreg <48 x half> @bitcast_v48i16_to_v48f16_scalar(<48 x i16> inreg %a, i ; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s25, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s24, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s22, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s21, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s20, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s19, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s18, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s2, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s0, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s0, 16 ; GFX11-TRUE16-NEXT: s_mov_b32 s46, 0 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB57_3 @@ -38021,16 +37967,16 @@ define inreg <48 x half> @bitcast_v48i16_to_v48f16_scalar(<48 x i16> inreg %a, i ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s25, s25, s41 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s24, s24, s40 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s23, s15 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s22, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s22, s14 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s21, s12 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s20, s11 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s19, s10 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s18, s9 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s17, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s16, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s16, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s13 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s4 ; GFX11-TRUE16-NEXT: v_pk_add_u16 v5, v5, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v4, v4, 3 op_sel_hi:[1,0] @@ -38045,7 +37991,7 @@ define inreg <48 x half> @bitcast_v48i16_to_v48f16_scalar(<48 x i16> inreg %a, i ; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, s25, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v10, s24, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v11, s15, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v12, s13, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v12, s14, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v13, s12, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v6, s11, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v7, s10, 3 op_sel_hi:[1,0] @@ -38055,7 +38001,7 @@ define inreg <48 x half> @bitcast_v48i16_to_v48f16_scalar(<48 x i16> inreg %a, i ; GFX11-TRUE16-NEXT: v_pk_add_u16 v26, s1, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v25, s2, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v24, s3, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v29, s6, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v29, s7, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v55, 16, v27 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v54, 16, v26 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v53, 16, v25 @@ -38096,12 +38042,12 @@ define inreg <48 x half> @bitcast_v48i16_to_v48f16_scalar(<48 x i16> inreg %a, i ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, s45 :: v_dual_mov_b32 v31, s44 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, s43 :: v_dual_mov_b32 v33, s42 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, s41 :: v_dual_mov_b32 v35, s40 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, s15 :: v_dual_mov_b32 v37, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, s15 :: v_dual_mov_b32 v37, s14 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, s12 :: v_dual_mov_b32 v39, s11 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v48, s10 :: v_dual_mov_b32 v49, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, s8 :: v_dual_mov_b32 v51, s6 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v52, s5 :: v_dual_mov_b32 v53, s7 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s4 :: v_dual_mov_b32 v55, s14 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, s8 :: v_dual_mov_b32 v51, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v52, s6 :: v_dual_mov_b32 v53, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s4 :: v_dual_mov_b32 v55, s13 ; GFX11-TRUE16-NEXT: .LBB57_5: ; %end ; GFX11-TRUE16-NEXT: v_and_b32_e32 v27, 0xffff, v27 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v65, 0xffff, v24 @@ -40168,19 +40114,13 @@ define inreg <48 x i16> @bitcast_v48f16_to_v48i16_scalar(<48 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v48f16_to_v48i16_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.l, v5.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.l, v4.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.h, v23.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v23.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.h, v23.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v23.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v23.h ; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s29, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s28, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s27, 16 @@ -40188,17 +40128,17 @@ define inreg <48 x i16> @bitcast_v48f16_to_v48i16_scalar(<48 x half> inreg %a, i ; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s25, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s24, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s23, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s22, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s22, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s12, s21, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s11, s20, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s19, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s18, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s2, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s0, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s0, 16 ; GFX11-TRUE16-NEXT: s_mov_b32 s46, 0 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB59_3 @@ -40225,16 +40165,16 @@ define inreg <48 x i16> @bitcast_v48f16_to_v48i16_scalar(<48 x half> inreg %a, i ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s25, s25, s41 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s24, s24, s40 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s23, s15 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s22, s13 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s22, s14 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s12, s21, s12 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s11, s20, s11 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s19, s10 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s18, s9 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s17, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s16, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s14 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s16, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s13 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s4 ; GFX11-TRUE16-NEXT: v_pk_add_f16 v5, 0x200, v5 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v4, 0x200, v4 op_sel_hi:[0,1] @@ -40249,7 +40189,7 @@ define inreg <48 x i16> @bitcast_v48f16_to_v48i16_scalar(<48 x half> inreg %a, i ; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, s25 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v10, 0x200, s24 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v11, 0x200, s15 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v12, 0x200, s13 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v12, 0x200, s14 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v13, 0x200, s12 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v6, 0x200, s11 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v7, 0x200, s10 op_sel_hi:[0,1] @@ -40259,7 +40199,7 @@ define inreg <48 x i16> @bitcast_v48f16_to_v48i16_scalar(<48 x half> inreg %a, i ; GFX11-TRUE16-NEXT: v_pk_add_f16 v26, 0x200, s1 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v25, 0x200, s2 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v24, 0x200, s3 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v29, 0x200, s6 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v29, 0x200, s7 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v55, 16, v27 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v54, 16, v26 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v53, 16, v25 @@ -40300,12 +40240,12 @@ define inreg <48 x i16> @bitcast_v48f16_to_v48i16_scalar(<48 x half> inreg %a, i ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v30, s45 :: v_dual_mov_b32 v31, s44 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, s43 :: v_dual_mov_b32 v33, s42 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, s41 :: v_dual_mov_b32 v35, s40 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, s15 :: v_dual_mov_b32 v37, s13 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, s15 :: v_dual_mov_b32 v37, s14 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, s12 :: v_dual_mov_b32 v39, s11 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v48, s10 :: v_dual_mov_b32 v49, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, s8 :: v_dual_mov_b32 v51, s6 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v52, s5 :: v_dual_mov_b32 v53, s7 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s4 :: v_dual_mov_b32 v55, s14 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, s8 :: v_dual_mov_b32 v51, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v52, s6 :: v_dual_mov_b32 v53, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s4 :: v_dual_mov_b32 v55, s13 ; GFX11-TRUE16-NEXT: .LBB59_5: ; %end ; GFX11-TRUE16-NEXT: v_and_b32_e32 v27, 0xffff, v27 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v65, 0xffff, v24 diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.832bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.832bit.ll index 29005a42d8860..fb0d5ee292d86 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.832bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.832bit.ll @@ -6286,23 +6286,15 @@ define inreg <26 x i32> @bitcast_v52i16_to_v26i32_scalar(<52 x i16> inreg %a, i3 ; GFX11-TRUE16-LABEL: bitcast_v52i16_to_v26i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v7.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v6.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v5.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v4.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v55, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v54, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v53, 0xffff, v2 @@ -10946,23 +10938,15 @@ define inreg <26 x i32> @bitcast_v52f16_to_v26i32_scalar(<52 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v52f16_to_v26i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v7.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v6.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v5.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v4.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v55, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v54, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v53, 0xffff, v2 @@ -16527,23 +16511,15 @@ define inreg <26 x float> @bitcast_v52i16_to_v26f32_scalar(<52 x i16> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v52i16_to_v26f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v7.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v6.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v5.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v4.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v55, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v54, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v53, 0xffff, v2 @@ -21183,23 +21159,15 @@ define inreg <26 x float> @bitcast_v52f16_to_v26f32_scalar(<52 x half> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v52f16_to_v26f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v7.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v6.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v5.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v4.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v55, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v54, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v53, 0xffff, v2 @@ -25980,23 +25948,15 @@ define inreg <13 x i64> @bitcast_v52i16_to_v13i64_scalar(<52 x i16> inreg %a, i3 ; GFX11-TRUE16-LABEL: bitcast_v52i16_to_v13i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v7.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v6.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v5.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v4.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v55, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v54, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v53, 0xffff, v2 @@ -30655,23 +30615,15 @@ define inreg <13 x i64> @bitcast_v52f16_to_v13i64_scalar(<52 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v52f16_to_v13i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v7.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v6.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v5.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v4.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v55, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v54, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v53, 0xffff, v2 @@ -34516,23 +34468,15 @@ define inreg <13 x double> @bitcast_v52i16_to_v13f64_scalar(<52 x i16> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v52i16_to_v13f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v7.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v6.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v5.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v4.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v55, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v54, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v53, 0xffff, v2 @@ -39081,23 +39025,15 @@ define inreg <13 x double> @bitcast_v52f16_to_v13f64_scalar(<52 x half> inreg %a ; GFX11-TRUE16-LABEL: bitcast_v52f16_to_v13f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v7.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v6.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v5.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v4.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v55, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v54, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v53, 0xffff, v2 @@ -41806,28 +41742,20 @@ define inreg <52 x half> @bitcast_v52i16_to_v52f16_scalar(<52 x i16> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v52i16_to_v52f16_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.l, v7.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.l, v6.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.l, v5.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.h, v25.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.h, v25.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.l, v4.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.h, v25.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v25.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.h, v25.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v25.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v25.h ; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s29, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s28, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s27, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s25, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s24, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s23, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s22, 16 @@ -41836,11 +41764,11 @@ define inreg <52 x half> @bitcast_v52i16_to_v52f16_scalar(<52 x i16> inreg %a, i ; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s19, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s18, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s2, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s0, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 ; GFX11-TRUE16-NEXT: s_mov_b32 s46, 0 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB57_3 @@ -41868,7 +41796,7 @@ define inreg <52 x half> @bitcast_v52i16_to_v52f16_scalar(<52 x i16> inreg %a, i ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s28, s28, s44 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s27, s27, s43 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s26, s26, s42 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s25, s25, s40 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s25, s25, s41 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s24, s15 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s23, s14 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s22, s13 @@ -41877,10 +41805,10 @@ define inreg <52 x half> @bitcast_v52i16_to_v52f16_scalar(<52 x i16> inreg %a, i ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s19, s10 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s18, s9 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s17, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s16, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s41 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s16, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s40 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s4 ; GFX11-TRUE16-NEXT: v_pk_add_u16 v7, v7, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v6, v6, 3 op_sel_hi:[1,0] @@ -41907,7 +41835,7 @@ define inreg <52 x half> @bitcast_v52i16_to_v52f16_scalar(<52 x i16> inreg %a, i ; GFX11-TRUE16-NEXT: v_pk_add_u16 v32, s1, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v29, s2, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v28, s3, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v27, s6, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v27, s7, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v67, 16, v33 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v66, 16, v32 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v65, 16, v29 @@ -41949,13 +41877,13 @@ define inreg <52 x half> @bitcast_v52i16_to_v52f16_scalar(<52 x i16> inreg %a, i ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, s1 :: v_dual_mov_b32 v33, s0 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, s45 :: v_dual_mov_b32 v35, s44 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, s43 :: v_dual_mov_b32 v37, s42 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, s40 :: v_dual_mov_b32 v39, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, s41 :: v_dual_mov_b32 v39, s15 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v48, s14 :: v_dual_mov_b32 v49, s13 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, s12 :: v_dual_mov_b32 v51, s11 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v52, s10 :: v_dual_mov_b32 v53, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s8 :: v_dual_mov_b32 v55, s6 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v64, s5 :: v_dual_mov_b32 v65, s7 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v66, s4 :: v_dual_mov_b32 v67, s41 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s8 :: v_dual_mov_b32 v55, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v64, s6 :: v_dual_mov_b32 v65, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v66, s4 :: v_dual_mov_b32 v67, s40 ; GFX11-TRUE16-NEXT: .LBB57_5: ; %end ; GFX11-TRUE16-NEXT: v_and_b32_e32 v33, 0xffff, v33 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v68, 0xffff, v32 @@ -44258,28 +44186,20 @@ define inreg <52 x i16> @bitcast_v52f16_to_v52i16_scalar(<52 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v52f16_to_v52i16_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.l, v7.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.l, v6.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.l, v5.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.h, v25.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.h, v25.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.l, v4.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.h, v25.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v25.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.h, v25.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v25.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v25.h ; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s29, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s28, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s27, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s26, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s25, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s25, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s24, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s14, s23, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s13, s22, 16 @@ -44288,11 +44208,11 @@ define inreg <52 x i16> @bitcast_v52f16_to_v52i16_scalar(<52 x half> inreg %a, i ; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s19, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s18, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s2, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s0, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s0, 16 ; GFX11-TRUE16-NEXT: s_mov_b32 s46, 0 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB59_3 @@ -44320,7 +44240,7 @@ define inreg <52 x i16> @bitcast_v52f16_to_v52i16_scalar(<52 x half> inreg %a, i ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s28, s28, s44 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s27, s27, s43 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s26, s26, s42 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s25, s25, s40 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s25, s25, s41 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s24, s15 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s14, s23, s14 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s13, s22, s13 @@ -44329,10 +44249,10 @@ define inreg <52 x i16> @bitcast_v52f16_to_v52i16_scalar(<52 x half> inreg %a, i ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s19, s10 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s18, s9 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s17, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s16, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s41 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s16, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s40 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s4 ; GFX11-TRUE16-NEXT: v_pk_add_f16 v7, 0x200, v7 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v6, 0x200, v6 op_sel_hi:[0,1] @@ -44359,7 +44279,7 @@ define inreg <52 x i16> @bitcast_v52f16_to_v52i16_scalar(<52 x half> inreg %a, i ; GFX11-TRUE16-NEXT: v_pk_add_f16 v32, 0x200, s1 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v29, 0x200, s2 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v28, 0x200, s3 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v27, 0x200, s6 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v27, 0x200, s7 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v67, 16, v33 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v66, 16, v32 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v65, 16, v29 @@ -44401,13 +44321,13 @@ define inreg <52 x i16> @bitcast_v52f16_to_v52i16_scalar(<52 x half> inreg %a, i ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v32, s1 :: v_dual_mov_b32 v33, s0 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, s45 :: v_dual_mov_b32 v35, s44 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v36, s43 :: v_dual_mov_b32 v37, s42 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, s40 :: v_dual_mov_b32 v39, s15 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, s41 :: v_dual_mov_b32 v39, s15 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v48, s14 :: v_dual_mov_b32 v49, s13 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, s12 :: v_dual_mov_b32 v51, s11 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v52, s10 :: v_dual_mov_b32 v53, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s8 :: v_dual_mov_b32 v55, s6 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v64, s5 :: v_dual_mov_b32 v65, s7 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v66, s4 :: v_dual_mov_b32 v67, s41 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s8 :: v_dual_mov_b32 v55, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v64, s6 :: v_dual_mov_b32 v65, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v66, s4 :: v_dual_mov_b32 v67, s40 ; GFX11-TRUE16-NEXT: .LBB59_5: ; %end ; GFX11-TRUE16-NEXT: v_and_b32_e32 v33, 0xffff, v33 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v68, 0xffff, v32 diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.896bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.896bit.ll index 8ee5b966f40b8..8f0e2f74db866 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.896bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.896bit.ll @@ -6779,27 +6779,17 @@ define inreg <28 x i32> @bitcast_v56i16_to_v28i32_scalar(<56 x i16> inreg %a, i3 ; GFX11-TRUE16-LABEL: bitcast_v56i16_to_v28i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v9.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v8.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v7.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v6.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v5.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v4.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v67, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v66, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v65, 0xffff, v2 @@ -11885,27 +11875,17 @@ define inreg <28 x i32> @bitcast_v56f16_to_v28i32_scalar(<56 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v56f16_to_v28i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v9.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v8.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v7.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v6.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v5.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v4.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v67, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v66, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v65, 0xffff, v2 @@ -17915,27 +17895,17 @@ define inreg <28 x float> @bitcast_v56i16_to_v28f32_scalar(<56 x i16> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v56i16_to_v28f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v9.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v8.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v7.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v6.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v5.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v4.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v67, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v66, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v65, 0xffff, v2 @@ -23006,27 +22976,17 @@ define inreg <28 x float> @bitcast_v56f16_to_v28f32_scalar(<56 x half> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v56f16_to_v28f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v9.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v8.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v7.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v6.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v5.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v4.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v67, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v66, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v65, 0xffff, v2 @@ -28216,27 +28176,17 @@ define inreg <14 x i64> @bitcast_v56i16_to_v14i64_scalar(<56 x i16> inreg %a, i3 ; GFX11-TRUE16-LABEL: bitcast_v56i16_to_v14i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v9.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v8.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v7.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v6.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v5.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v4.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v67, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v66, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v65, 0xffff, v2 @@ -33336,27 +33286,17 @@ define inreg <14 x i64> @bitcast_v56f16_to_v14i64_scalar(<56 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v56f16_to_v14i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v9.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v8.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v7.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v6.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v5.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v4.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v67, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v66, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v65, 0xffff, v2 @@ -37545,27 +37485,17 @@ define inreg <14 x double> @bitcast_v56i16_to_v14f64_scalar(<56 x i16> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v56i16_to_v14f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v9.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v8.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v7.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v6.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v5.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v4.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v67, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v66, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v65, 0xffff, v2 @@ -42545,27 +42475,17 @@ define inreg <14 x double> @bitcast_v56f16_to_v14f64_scalar(<56 x half> inreg %a ; GFX11-TRUE16-LABEL: bitcast_v56f16_to_v14f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v9.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v8.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v7.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v6.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v5.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v4.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v67, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v66, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v65, 0xffff, v2 @@ -45566,30 +45486,20 @@ define inreg <56 x half> @bitcast_v56i16_to_v56f16_scalar(<56 x i16> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v56i16_to_v56f16_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.l, v9.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v26.l, v8.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.l, v7.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v26.h, v27.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.h, v27.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.l, v6.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.h, v27.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.l, v5.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.h, v27.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.l, v4.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.h, v27.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v27.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.h, v27.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v27.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v27.h ; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s29, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s27, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s26, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s25, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s24, 16 @@ -45600,11 +45510,11 @@ define inreg <56 x half> @bitcast_v56i16_to_v56f16_scalar(<56 x i16> inreg %a, i ; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s19, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s18, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s2, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s0, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s0, 16 ; GFX11-TRUE16-NEXT: s_mov_b32 s46, 0 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB57_3 @@ -45634,7 +45544,7 @@ define inreg <56 x half> @bitcast_v56i16_to_v56f16_scalar(<56 x i16> inreg %a, i ; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v18, 16, v0 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s29, s29, s45 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s28, s28, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s27, s27, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s27, s27, s43 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s26, s26, s41 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s25, s25, s40 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s24, s15 @@ -45645,10 +45555,10 @@ define inreg <56 x half> @bitcast_v56i16_to_v56f16_scalar(<56 x i16> inreg %a, i ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s19, s10 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s18, s9 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s17, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s16, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s16, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s42 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s4 ; GFX11-TRUE16-NEXT: v_pk_add_u16 v9, v9, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v8, v8, 3 op_sel_hi:[1,0] @@ -45677,7 +45587,7 @@ define inreg <56 x half> @bitcast_v56i16_to_v56f16_scalar(<56 x i16> inreg %a, i ; GFX11-TRUE16-NEXT: v_pk_add_u16 v31, s1, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v30, s2, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v37, s3, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v36, s6, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v36, s7, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v71, 16, v32 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v70, 16, v31 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v69, 16, v30 @@ -45720,14 +45630,14 @@ define inreg <56 x half> @bitcast_v56i16_to_v56f16_scalar(<56 x i16> inreg %a, i ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, s3 :: v_dual_mov_b32 v30, s2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, s1 :: v_dual_mov_b32 v32, s0 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, s45 :: v_dual_mov_b32 v39, s44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v48, s42 :: v_dual_mov_b32 v49, s41 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v48, s43 :: v_dual_mov_b32 v49, s41 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, s40 :: v_dual_mov_b32 v51, s15 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v52, s14 :: v_dual_mov_b32 v53, s13 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s12 :: v_dual_mov_b32 v55, s11 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v64, s10 :: v_dual_mov_b32 v65, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v66, s8 :: v_dual_mov_b32 v67, s6 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v68, s5 :: v_dual_mov_b32 v69, s7 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v70, s4 :: v_dual_mov_b32 v71, s43 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v66, s8 :: v_dual_mov_b32 v67, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v68, s6 :: v_dual_mov_b32 v69, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v70, s4 :: v_dual_mov_b32 v71, s42 ; GFX11-TRUE16-NEXT: .LBB57_5: ; %end ; GFX11-TRUE16-NEXT: v_and_b32_e32 v31, 0xffff, v31 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v37, 0xffff, v37 @@ -48280,30 +48190,20 @@ define inreg <56 x i16> @bitcast_v56f16_to_v56i16_scalar(<56 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v56f16_to_v56i16_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v10 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.l, v9.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v26.l, v8.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.l, v7.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v26.h, v27.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.h, v27.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.l, v6.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.h, v27.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.l, v5.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.h, v27.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.l, v4.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.h, v27.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v27.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.h, v27.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v27.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v27.h ; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s29, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s28, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s27, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s27, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s26, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s40, s25, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s15, s24, 16 @@ -48314,11 +48214,11 @@ define inreg <56 x i16> @bitcast_v56f16_to_v56i16_scalar(<56 x half> inreg %a, i ; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s19, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s18, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s2, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s0, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s0, 16 ; GFX11-TRUE16-NEXT: s_mov_b32 s46, 0 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB59_3 @@ -48348,7 +48248,7 @@ define inreg <56 x i16> @bitcast_v56f16_to_v56i16_scalar(<56 x half> inreg %a, i ; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v18, 16, v0 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s29, s29, s45 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s28, s28, s44 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s27, s27, s42 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s27, s27, s43 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s26, s26, s41 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s25, s25, s40 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s15, s24, s15 @@ -48359,10 +48259,10 @@ define inreg <56 x i16> @bitcast_v56f16_to_v56i16_scalar(<56 x half> inreg %a, i ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s19, s10 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s18, s9 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s17, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s16, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s43 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s16, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s42 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s4 ; GFX11-TRUE16-NEXT: v_pk_add_f16 v9, 0x200, v9 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v8, 0x200, v8 op_sel_hi:[0,1] @@ -48391,7 +48291,7 @@ define inreg <56 x i16> @bitcast_v56f16_to_v56i16_scalar(<56 x half> inreg %a, i ; GFX11-TRUE16-NEXT: v_pk_add_f16 v31, 0x200, s1 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v30, 0x200, s2 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v37, 0x200, s3 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v36, 0x200, s6 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v36, 0x200, s7 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v71, 16, v32 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v70, 16, v31 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v69, 16, v30 @@ -48434,14 +48334,14 @@ define inreg <56 x i16> @bitcast_v56f16_to_v56i16_scalar(<56 x half> inreg %a, i ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, s3 :: v_dual_mov_b32 v30, s2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v31, s1 :: v_dual_mov_b32 v32, s0 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v38, s45 :: v_dual_mov_b32 v39, s44 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v48, s42 :: v_dual_mov_b32 v49, s41 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v48, s43 :: v_dual_mov_b32 v49, s41 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, s40 :: v_dual_mov_b32 v51, s15 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v52, s14 :: v_dual_mov_b32 v53, s13 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s12 :: v_dual_mov_b32 v55, s11 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v64, s10 :: v_dual_mov_b32 v65, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v66, s8 :: v_dual_mov_b32 v67, s6 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v68, s5 :: v_dual_mov_b32 v69, s7 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v70, s4 :: v_dual_mov_b32 v71, s43 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v66, s8 :: v_dual_mov_b32 v67, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v68, s6 :: v_dual_mov_b32 v69, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v70, s4 :: v_dual_mov_b32 v71, s42 ; GFX11-TRUE16-NEXT: .LBB59_5: ; %end ; GFX11-TRUE16-NEXT: v_and_b32_e32 v31, 0xffff, v31 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v37, 0xffff, v37 diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.960bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.960bit.ll index 967f1a9b442b0..e663be7a6210e 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.960bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.960bit.ll @@ -7240,31 +7240,19 @@ define inreg <30 x i32> @bitcast_v60i16_to_v30i32_scalar(<60 x i16> inreg %a, i3 ; GFX11-TRUE16-LABEL: bitcast_v60i16_to_v30i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v11.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v10.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v9.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v8.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v7.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v6.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.l, v5.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v4.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v50.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v50.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v51.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v51.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v71, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v70, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v69, 0xffff, v2 @@ -12840,31 +12828,19 @@ define inreg <30 x i32> @bitcast_v60f16_to_v30i32_scalar(<60 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v60f16_to_v30i32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v11.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v10.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v9.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v8.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v7.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v6.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.l, v5.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v4.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v50.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v50.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v51.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v51.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v71, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v70, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v69, 0xffff, v2 @@ -19290,31 +19266,19 @@ define inreg <30 x float> @bitcast_v60i16_to_v30f32_scalar(<60 x i16> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v60i16_to_v30f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v11.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v10.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v9.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v8.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v7.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v6.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.l, v5.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v4.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v50.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v50.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v51.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v51.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v71, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v70, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v69, 0xffff, v2 @@ -24867,31 +24831,19 @@ define inreg <30 x float> @bitcast_v60f16_to_v30f32_scalar(<60 x half> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v60f16_to_v30f32_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v11.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v10.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v9.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v8.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v7.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v6.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.l, v5.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v4.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v50.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v50.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v51.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v51.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v71, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v70, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v69, 0xffff, v2 @@ -30472,31 +30424,19 @@ define inreg <15 x i64> @bitcast_v60i16_to_v15i64_scalar(<60 x i16> inreg %a, i3 ; GFX11-TRUE16-LABEL: bitcast_v60i16_to_v15i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v11.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v10.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v9.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v8.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v7.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v6.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.l, v5.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v4.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v50.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v50.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v51.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v51.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v71, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v70, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v69, 0xffff, v2 @@ -36089,31 +36029,19 @@ define inreg <15 x i64> @bitcast_v60f16_to_v15i64_scalar(<60 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v60f16_to_v15i64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v11.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v10.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v9.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v8.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v7.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v6.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.l, v5.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v4.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v50.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v50.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v51.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v51.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v71, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v70, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v69, 0xffff, v2 @@ -40632,31 +40560,19 @@ define inreg <15 x double> @bitcast_v60i16_to_v15f64_scalar(<60 x i16> inreg %a, ; GFX11-TRUE16-LABEL: bitcast_v60i16_to_v15f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v11.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v10.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v9.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v8.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v7.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v6.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.l, v5.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v4.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v50.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v50.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v51.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v51.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v71, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v70, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v69, 0xffff, v2 @@ -46109,31 +46025,19 @@ define inreg <15 x double> @bitcast_v60f16_to_v15f64_scalar(<60 x half> inreg %a ; GFX11-TRUE16-LABEL: bitcast_v60f16_to_v15f64_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v11.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.l, v10.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.l, v9.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v33.h, v32.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v34.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v8.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.l, v7.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v36.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.l, v6.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v37.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.l, v5.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.l, v4.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v39.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v48.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v50.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v50.h, v32.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v51.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v51.h, v32.h ; GFX11-TRUE16-NEXT: v_and_b32_e32 v71, 0xffff, v0 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v70, 0xffff, v1 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v69, 0xffff, v2 @@ -49421,32 +49325,20 @@ define inreg <60 x half> @bitcast_v60i16_to_v60f16_scalar(<60 x i16> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v60i16_to_v60f16_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.l, v11.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v28.l, v10.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.l, v9.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v28.h, v29.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.h, v29.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v26.l, v8.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v26.h, v29.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.l, v7.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.h, v29.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.l, v6.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.h, v29.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.l, v5.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.h, v29.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.l, v4.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.h, v29.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v29.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.h, v29.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v29.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v29.h -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s29, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s28, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s27, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s26, 16 @@ -49459,11 +49351,11 @@ define inreg <60 x half> @bitcast_v60i16_to_v60f16_scalar(<60 x i16> inreg %a, i ; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s19, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s18, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s2, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s0, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s0, 16 ; GFX11-TRUE16-NEXT: s_mov_b32 s46, 0 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB57_3 @@ -49495,7 +49387,7 @@ define inreg <60 x half> @bitcast_v60i16_to_v60f16_scalar(<60 x i16> inreg %a, i ; GFX11-TRUE16-NEXT: v_lshl_or_b32 v2, v20, 16, v2 ; GFX11-TRUE16-NEXT: v_lshl_or_b32 v1, v19, 16, v1 ; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v18, 16, v0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s29, s29, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s29, s29, s45 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s28, s28, s43 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s27, s27, s42 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s26, s26, s41 @@ -49508,10 +49400,10 @@ define inreg <60 x half> @bitcast_v60i16_to_v60f16_scalar(<60 x i16> inreg %a, i ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s19, s10 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s18, s9 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s17, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s16, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s16, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s44 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s4 ; GFX11-TRUE16-NEXT: v_pk_add_u16 v11, v11, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v10, v10, 3 op_sel_hi:[1,0] @@ -49542,7 +49434,7 @@ define inreg <60 x half> @bitcast_v60i16_to_v60f16_scalar(<60 x i16> inreg %a, i ; GFX11-TRUE16-NEXT: v_pk_add_u16 v39, s1, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v38, s2, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_pk_add_u16 v37, s3, 3 op_sel_hi:[1,0] -; GFX11-TRUE16-NEXT: v_pk_add_u16 v36, s6, 3 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_pk_add_u16 v36, s7, 3 op_sel_hi:[1,0] ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v83, 16, v48 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v82, 16, v39 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v81, 16, v38 @@ -49586,15 +49478,15 @@ define inreg <60 x half> @bitcast_v60i16_to_v60f16_scalar(<60 x i16> inreg %a, i ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v49, s17 :: v_dual_mov_b32 v36, s16 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, s3 :: v_dual_mov_b32 v38, s2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, s1 :: v_dual_mov_b32 v48, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, s44 :: v_dual_mov_b32 v51, s43 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, s45 :: v_dual_mov_b32 v51, s43 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v52, s42 :: v_dual_mov_b32 v53, s41 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s40 :: v_dual_mov_b32 v55, s15 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v64, s14 :: v_dual_mov_b32 v65, s13 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v66, s12 :: v_dual_mov_b32 v67, s11 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v68, s10 :: v_dual_mov_b32 v69, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v70, s8 :: v_dual_mov_b32 v71, s6 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v80, s5 :: v_dual_mov_b32 v81, s7 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v82, s4 :: v_dual_mov_b32 v83, s45 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v70, s8 :: v_dual_mov_b32 v71, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v80, s6 :: v_dual_mov_b32 v81, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v82, s4 :: v_dual_mov_b32 v83, s44 ; GFX11-TRUE16-NEXT: .LBB57_5: ; %end ; GFX11-TRUE16-NEXT: v_and_b32_e32 v49, 0xffff, v49 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 @@ -52368,32 +52260,20 @@ define inreg <60 x i16> @bitcast_v60f16_to_v60i16_scalar(<60 x half> inreg %a, i ; GFX11-TRUE16-LABEL: bitcast_v60f16_to_v60i16_scalar: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.h, 0 ; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v12 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.l, v11.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v28.l, v10.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.l, v9.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v28.h, v29.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.h, v29.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v26.l, v8.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v26.h, v29.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.l, v7.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.h, v29.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.l, v6.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.h, v29.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.l, v5.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.h, v29.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.l, v4.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.h, v29.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.l, v3.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.h, v29.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.l, v2.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.h, v29.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.l, v1.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.h, v29.h ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.l, v0.h -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.h, v29.h -; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s29, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s29, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s43, s28, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s42, s27, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s41, s26, 16 @@ -52406,11 +52286,11 @@ define inreg <60 x i16> @bitcast_v60f16_to_v60i16_scalar(<60 x half> inreg %a, i ; GFX11-TRUE16-NEXT: s_lshr_b32 s10, s19, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s9, s18, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s8, s17, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s16, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s3, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s2, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s7, s16, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s6, s3, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s5, s2, 16 ; GFX11-TRUE16-NEXT: s_lshr_b32 s4, s1, 16 -; GFX11-TRUE16-NEXT: s_lshr_b32 s45, s0, 16 +; GFX11-TRUE16-NEXT: s_lshr_b32 s44, s0, 16 ; GFX11-TRUE16-NEXT: s_mov_b32 s46, 0 ; GFX11-TRUE16-NEXT: s_and_b32 s47, vcc_lo, exec_lo ; GFX11-TRUE16-NEXT: s_cbranch_scc0 .LBB59_3 @@ -52442,7 +52322,7 @@ define inreg <60 x i16> @bitcast_v60f16_to_v60i16_scalar(<60 x half> inreg %a, i ; GFX11-TRUE16-NEXT: v_lshl_or_b32 v2, v20, 16, v2 ; GFX11-TRUE16-NEXT: v_lshl_or_b32 v1, v19, 16, v1 ; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v18, 16, v0 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s29, s29, s44 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s29, s29, s45 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s28, s28, s43 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s27, s27, s42 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s26, s26, s41 @@ -52455,10 +52335,10 @@ define inreg <60 x i16> @bitcast_v60f16_to_v60i16_scalar(<60 x half> inreg %a, i ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s10, s19, s10 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s9, s18, s9 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s8, s17, s8 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s6, s16, s6 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s5 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s7 -; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s45 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s7, s16, s7 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s3, s3, s6 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s5 +; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s0, s0, s44 ; GFX11-TRUE16-NEXT: s_pack_ll_b32_b16 s1, s1, s4 ; GFX11-TRUE16-NEXT: v_pk_add_f16 v11, 0x200, v11 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v10, 0x200, v10 op_sel_hi:[0,1] @@ -52489,7 +52369,7 @@ define inreg <60 x i16> @bitcast_v60f16_to_v60i16_scalar(<60 x half> inreg %a, i ; GFX11-TRUE16-NEXT: v_pk_add_f16 v39, 0x200, s1 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v38, 0x200, s2 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_pk_add_f16 v37, 0x200, s3 op_sel_hi:[0,1] -; GFX11-TRUE16-NEXT: v_pk_add_f16 v36, 0x200, s6 op_sel_hi:[0,1] +; GFX11-TRUE16-NEXT: v_pk_add_f16 v36, 0x200, s7 op_sel_hi:[0,1] ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v83, 16, v48 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v82, 16, v39 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v81, 16, v38 @@ -52533,15 +52413,15 @@ define inreg <60 x i16> @bitcast_v60f16_to_v60i16_scalar(<60 x half> inreg %a, i ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v49, s17 :: v_dual_mov_b32 v36, s16 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v37, s3 :: v_dual_mov_b32 v38, s2 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v39, s1 :: v_dual_mov_b32 v48, s0 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, s44 :: v_dual_mov_b32 v51, s43 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v50, s45 :: v_dual_mov_b32 v51, s43 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v52, s42 :: v_dual_mov_b32 v53, s41 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v54, s40 :: v_dual_mov_b32 v55, s15 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v64, s14 :: v_dual_mov_b32 v65, s13 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v66, s12 :: v_dual_mov_b32 v67, s11 ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v68, s10 :: v_dual_mov_b32 v69, s9 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v70, s8 :: v_dual_mov_b32 v71, s6 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v80, s5 :: v_dual_mov_b32 v81, s7 -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v82, s4 :: v_dual_mov_b32 v83, s45 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v70, s8 :: v_dual_mov_b32 v71, s7 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v80, s6 :: v_dual_mov_b32 v81, s5 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v82, s4 :: v_dual_mov_b32 v83, s44 ; GFX11-TRUE16-NEXT: .LBB59_5: ; %end ; GFX11-TRUE16-NEXT: v_and_b32_e32 v49, 0xffff, v49 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-true16.mir b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-true16.mir index 043bcc343d265..f15ac1d6374be 100644 --- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-true16.mir +++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-true16.mir @@ -131,6 +131,34 @@ body: | %2:sreg_32 = S_TRUNC_F16 %1:sreg_32, implicit $mode ... +--- +name: copy_vgpr16_sreg32_lo16_usedby_salu16 +body: | + bb.0: + ; GCN-LABEL: name: copy_vgpr16_sreg32_lo16_usedby_salu16 + ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[DEF]].lo16, %subreg.lo16, [[DEF1]], %subreg.hi16 + ; GCN-NEXT: [[V_TRUNC_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_TRUNC_F16_t16_e64 0, [[REG_SEQUENCE]].lo16, 0, 0, 0, implicit $mode, implicit $exec + %0:vgpr_32 = IMPLICIT_DEF + %1:sreg_32 = COPY %0.lo16:vgpr_32 + %2:sreg_32 = S_TRUNC_F16 %1:sreg_32, implicit $mode +... + +--- +name: copy_vgpr16_sreg32_hi16_usedby_salu16 +body: | + bb.0: + ; GCN-LABEL: name: copy_vgpr16_sreg32_hi16_usedby_salu16 + ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[DEF]].hi16, %subreg.lo16, [[DEF1]], %subreg.hi16 + ; GCN-NEXT: [[V_TRUNC_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_TRUNC_F16_t16_e64 0, [[REG_SEQUENCE]].lo16, 0, 0, 0, implicit $mode, implicit $exec + %0:vgpr_32 = IMPLICIT_DEF + %1:sreg_32 = COPY %0.hi16:vgpr_32 + %2:sreg_32 = S_TRUNC_F16 %1:sreg_32, implicit $mode +... + --- name: copy_vgpr16_sreg32_usedby_salu32 body: | @@ -158,21 +186,17 @@ body: | ... --- -name: S_FMAC_F16 +name: reg_sequence_vgpr32_sreg32 body: | bb.0: - ; GCN-LABEL: name: S_FMAC_F16 + ; GCN-LABEL: name: reg_sequence_vgpr32_sreg32 ; GCN: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF - ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[DEF]], %subreg.lo16, [[DEF1]], %subreg.hi16 - ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[DEF]], %subreg.lo16, [[DEF2]], %subreg.hi16 - ; GCN-NEXT: [[V_FMAC_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_FMAC_F16_t16_e64 0, [[REG_SEQUENCE1]].lo16, 0, [[REG_SEQUENCE1]].lo16, 0, [[REG_SEQUENCE]].lo16, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[DEF1]].lo16, %subreg.lo16, [[DEF]], %subreg.hi16 %0:vgpr_16 = IMPLICIT_DEF - %1:sgpr_lo16 = COPY %0:vgpr_16 - %2:sreg_32 = COPY %0:vgpr_16 - %3:sreg_32 = COPY %1:sgpr_lo16 - %4:sreg_32 = S_FMAC_F16 %3:sreg_32, %3:sreg_32, %2:sreg_32, implicit $mode + %1:vgpr_32 = IMPLICIT_DEF + %2:sreg_32 = COPY %1:vgpr_32 + %3:vgpr_32 = REG_SEQUENCE %2:sreg_32, %subreg.lo16, %0:vgpr_16, %subreg.hi16 ... --- diff --git a/llvm/test/CodeGen/AMDGPU/frem.ll b/llvm/test/CodeGen/AMDGPU/frem.ll index 78a961ea0da17..5cc88a6d39071 100644 --- a/llvm/test/CodeGen/AMDGPU/frem.ll +++ b/llvm/test/CodeGen/AMDGPU/frem.ll @@ -538,11 +538,10 @@ define amdgpu_kernel void @frem_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1 ; GFX11-TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v3, v2 ; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB0_2 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %frem.else -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, 0 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, 0 ; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v3, v2 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_bfi_b32 v4, 0x7fff, v5, v4 +; GFX11-TRUE16-NEXT: v_bfi_b32 v4, 0x7fff, v4, v1 ; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.l, v1.l, v4.l, vcc_lo ; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB0_3 ; GFX11-TRUE16-NEXT: s_branch .LBB0_8 @@ -622,10 +621,9 @@ define amdgpu_kernel void @frem_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc_lo ; GFX11-TRUE16-NEXT: v_ldexp_f32 v2, v3, v2 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v2.l, v2 -; GFX11-TRUE16-NEXT: v_bfi_b32 v4, 0x7fff, v2, v3 +; GFX11-TRUE16-NEXT: v_bfi_b32 v4, 0x7fff, v2, v1 ; GFX11-TRUE16-NEXT: .LBB0_8: ; %Flow19 ; GFX11-TRUE16-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0, v0.l ; GFX11-TRUE16-NEXT: v_cmp_nle_f16_e64 s2, 0x7c00, |v1.l| @@ -770,12 +768,11 @@ define amdgpu_kernel void @frem_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1 ; GFX1150-TRUE16-NEXT: s_cmp_ngt_f32 s1, s0 ; GFX1150-TRUE16-NEXT: s_cbranch_scc0 .LBB0_2 ; GFX1150-TRUE16-NEXT: ; %bb.1: ; %frem.else -; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l -; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0 +; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0 ; GFX1150-TRUE16-NEXT: s_cmp_eq_f32 s1, s0 ; GFX1150-TRUE16-NEXT: s_cselect_b32 s2, -1, 0 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1150-TRUE16-NEXT: v_bfi_b32 v2, 0x7fff, v3, v2 +; GFX1150-TRUE16-NEXT: v_bfi_b32 v2, 0x7fff, v2, v0 ; GFX1150-TRUE16-NEXT: v_cndmask_b16 v2.l, v0.l, v2.l, s2 ; GFX1150-TRUE16-NEXT: s_cbranch_execz .LBB0_3 ; GFX1150-TRUE16-NEXT: s_branch .LBB0_8 @@ -856,12 +853,11 @@ define amdgpu_kernel void @frem_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1 ; GFX1150-TRUE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0, v4 ; GFX1150-TRUE16-NEXT: v_add_f32_e32 v3, v4, v3 ; GFX1150-TRUE16-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc_lo -; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-TRUE16-NEXT: v_ldexp_f32 v2, v3, v2 -; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l ; GFX1150-TRUE16-NEXT: v_cvt_f16_f32_e32 v2.l, v2 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1150-TRUE16-NEXT: v_bfi_b32 v2, 0x7fff, v2, v3 +; GFX1150-TRUE16-NEXT: v_bfi_b32 v2, 0x7fff, v2, v0 ; GFX1150-TRUE16-NEXT: .LBB0_8: ; %Flow19 ; GFX1150-TRUE16-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_and_b32 v0, 0x7fff, v0 ; GFX1150-TRUE16-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0, v1.l @@ -1015,12 +1011,11 @@ define amdgpu_kernel void @frem_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1 ; GFX1200-TRUE16-NEXT: s_cmp_ngt_f32 s1, s0 ; GFX1200-TRUE16-NEXT: s_cbranch_scc0 .LBB0_2 ; GFX1200-TRUE16-NEXT: ; %bb.1: ; %frem.else -; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l -; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0 +; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0 ; GFX1200-TRUE16-NEXT: s_cmp_eq_f32 s1, s0 ; GFX1200-TRUE16-NEXT: s_cselect_b32 s2, -1, 0 ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1200-TRUE16-NEXT: v_bfi_b32 v2, 0x7fff, v3, v2 +; GFX1200-TRUE16-NEXT: v_bfi_b32 v2, 0x7fff, v2, v0 ; GFX1200-TRUE16-NEXT: v_cndmask_b16 v2.l, v0.l, v2.l, s2 ; GFX1200-TRUE16-NEXT: s_cbranch_execz .LBB0_3 ; GFX1200-TRUE16-NEXT: s_branch .LBB0_8 @@ -1104,12 +1099,11 @@ define amdgpu_kernel void @frem_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1 ; GFX1200-TRUE16-NEXT: v_add_f32_e32 v3, v4, v3 ; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffd ; GFX1200-TRUE16-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc_lo -; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-TRUE16-NEXT: v_ldexp_f32 v2, v3, v2 -; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l ; GFX1200-TRUE16-NEXT: v_cvt_f16_f32_e32 v2.l, v2 ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1200-TRUE16-NEXT: v_bfi_b32 v2, 0x7fff, v2, v3 +; GFX1200-TRUE16-NEXT: v_bfi_b32 v2, 0x7fff, v2, v0 ; GFX1200-TRUE16-NEXT: .LBB0_8: ; %Flow19 ; GFX1200-TRUE16-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_and_b32 v0, 0x7fff, v0 ; GFX1200-TRUE16-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0, v1.l @@ -5783,11 +5777,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v4, v3 ; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB9_2 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %frem.else -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, 0 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0 ; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v4, v3 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_bfi_b32 v2, 0x7fff, v5, v2 +; GFX11-TRUE16-NEXT: v_bfi_b32 v2, 0x7fff, v2, v0 ; GFX11-TRUE16-NEXT: v_cndmask_b16 v2.l, v0.l, v2.l, vcc_lo ; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB9_3 ; GFX11-TRUE16-NEXT: s_branch .LBB9_8 @@ -5867,10 +5860,9 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc_lo ; GFX11-TRUE16-NEXT: v_ldexp_f32 v2, v3, v2 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v2.l, v2 -; GFX11-TRUE16-NEXT: v_bfi_b32 v2, 0x7fff, v2, v3 +; GFX11-TRUE16-NEXT: v_bfi_b32 v2, 0x7fff, v2, v0 ; GFX11-TRUE16-NEXT: .LBB9_8: ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v1 @@ -5881,11 +5873,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v6, v5 ; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB9_10 ; GFX11-TRUE16-NEXT: ; %bb.9: ; %frem.else20 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v3.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, 0 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, 0 ; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v6, v5 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_bfi_b32 v7, 0x7fff, v8, v7 +; GFX11-TRUE16-NEXT: v_bfi_b32 v7, 0x7fff, v7, v3 ; GFX11-TRUE16-NEXT: v_cndmask_b16 v7.l, v3.l, v7.l, vcc_lo ; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB9_11 ; GFX11-TRUE16-NEXT: s_branch .LBB9_16 @@ -5965,10 +5956,9 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc_lo ; GFX11-TRUE16-NEXT: v_ldexp_f32 v5, v6, v5 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v3.l -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v5.l, v5 -; GFX11-TRUE16-NEXT: v_bfi_b32 v7, 0x7fff, v5, v6 +; GFX11-TRUE16-NEXT: v_bfi_b32 v7, 0x7fff, v5, v3 ; GFX11-TRUE16-NEXT: .LBB9_16: ; %Flow54 ; GFX11-TRUE16-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0, v1.l ; GFX11-TRUE16-NEXT: v_cmp_nle_f16_e64 s2, 0x7c00, |v0.l| @@ -8964,11 +8954,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v6, v5 ; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB10_2 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %frem.else -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, 0 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, 0 ; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v6, v5 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_bfi_b32 v4, 0x7fff, v7, v4 +; GFX11-TRUE16-NEXT: v_bfi_b32 v4, 0x7fff, v4, v0 ; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.l, v0.l, v4.l, vcc_lo ; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB10_3 ; GFX11-TRUE16-NEXT: s_branch .LBB10_8 @@ -9048,10 +9037,9 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc_lo ; GFX11-TRUE16-NEXT: v_ldexp_f32 v4, v5, v4 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.l -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v4.l, v4 -; GFX11-TRUE16-NEXT: v_bfi_b32 v4, 0x7fff, v4, v5 +; GFX11-TRUE16-NEXT: v_bfi_b32 v4, 0x7fff, v4, v0 ; GFX11-TRUE16-NEXT: .LBB10_8: ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v0 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 @@ -9062,11 +9050,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v9, v8 ; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB10_10 ; GFX11-TRUE16-NEXT: ; %bb.9: ; %frem.else20 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v5.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.l, 0 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, 0 ; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v9, v8 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_bfi_b32 v7, 0x7fff, v10, v7 +; GFX11-TRUE16-NEXT: v_bfi_b32 v7, 0x7fff, v7, v5 ; GFX11-TRUE16-NEXT: v_cndmask_b16 v7.l, v5.l, v7.l, vcc_lo ; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB10_11 ; GFX11-TRUE16-NEXT: s_branch .LBB10_16 @@ -9146,10 +9133,9 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc_lo ; GFX11-TRUE16-NEXT: v_ldexp_f32 v7, v8, v7 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v5.l -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v7.l, v7 -; GFX11-TRUE16-NEXT: v_bfi_b32 v7, 0x7fff, v7, v8 +; GFX11-TRUE16-NEXT: v_bfi_b32 v7, 0x7fff, v7, v5 ; GFX11-TRUE16-NEXT: .LBB10_16: ; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e64 v10, |v1.l| ; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e64 v9, |v3.l| @@ -9157,11 +9143,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v10, v9 ; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB10_18 ; GFX11-TRUE16-NEXT: ; %bb.17: ; %frem.else53 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v1.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, 0 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, 0 ; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v10, v9 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_bfi_b32 v8, 0x7fff, v11, v8 +; GFX11-TRUE16-NEXT: v_bfi_b32 v8, 0x7fff, v8, v1 ; GFX11-TRUE16-NEXT: v_cndmask_b16 v8.l, v1.l, v8.l, vcc_lo ; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB10_19 ; GFX11-TRUE16-NEXT: s_branch .LBB10_24 @@ -9241,10 +9226,9 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc_lo ; GFX11-TRUE16-NEXT: v_ldexp_f32 v8, v9, v8 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v1.l -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v8.l, v8 -; GFX11-TRUE16-NEXT: v_bfi_b32 v8, 0x7fff, v8, v9 +; GFX11-TRUE16-NEXT: v_bfi_b32 v8, 0x7fff, v8, v1 ; GFX11-TRUE16-NEXT: .LBB10_24: ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v9, 16, v1 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v10, 16, v3 @@ -9255,11 +9239,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v13, v12 ; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB10_26 ; GFX11-TRUE16-NEXT: ; %bb.25: ; %frem.else86 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, v9.l -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.l, 0 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, 0 ; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v13, v12 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_bfi_b32 v11, 0x7fff, v14, v11 +; GFX11-TRUE16-NEXT: v_bfi_b32 v11, 0x7fff, v11, v9 ; GFX11-TRUE16-NEXT: v_cndmask_b16 v11.l, v9.l, v11.l, vcc_lo ; GFX11-TRUE16-NEXT: s_cbranch_execz .LBB10_27 ; GFX11-TRUE16-NEXT: s_branch .LBB10_32 @@ -9339,10 +9322,9 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v12, v13, v12, vcc_lo ; GFX11-TRUE16-NEXT: v_ldexp_f32 v11, v12, v11 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.l, v9.l -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v11.l, v11 -; GFX11-TRUE16-NEXT: v_bfi_b32 v11, 0x7fff, v11, v12 +; GFX11-TRUE16-NEXT: v_bfi_b32 v11, 0x7fff, v11, v9 ; GFX11-TRUE16-NEXT: .LBB10_32: ; %Flow124 ; GFX11-TRUE16-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0, v2.l ; GFX11-TRUE16-NEXT: v_cmp_nle_f16_e64 s2, 0x7c00, |v0.l| diff --git a/llvm/test/CodeGen/AMDGPU/move-to-valu-pseudo-scalar-trans-f16-true16.ll b/llvm/test/CodeGen/AMDGPU/move-to-valu-pseudo-scalar-trans-f16-true16.ll index 3768634c1691c..d65398171840c 100644 --- a/llvm/test/CodeGen/AMDGPU/move-to-valu-pseudo-scalar-trans-f16-true16.ll +++ b/llvm/test/CodeGen/AMDGPU/move-to-valu-pseudo-scalar-trans-f16-true16.ll @@ -14,7 +14,7 @@ define amdgpu_kernel void @exp_f16(ptr addrspace(1) %ptr) { ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[GLOBAL_LOAD_SHORT_D16_SADDR_t16_]], %subreg.lo16, [[DEF]], %subreg.hi16 ; CHECK-NEXT: [[V_EXP_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_EXP_F16_t16_e64 0, killed [[REG_SEQUENCE]].lo16, 0, 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_EXP_F16_t16_e64_]], %subreg.lo16, [[DEF1]], %subreg.hi16 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vgpr_32 = REG_SEQUENCE killed [[V_EXP_F16_t16_e64_]], %subreg.lo16, [[DEF1]], %subreg.hi16 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[REG_SEQUENCE1]] ; CHECK-NEXT: GLOBAL_STORE_SHORT_SADDR_t16 [[V_MOV_B32_e32_]], killed [[COPY1]], [[S_LOAD_DWORDX2_IMM]], 0, 0, implicit $exec :: (store (s16) into %ir.ptr.load, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 @@ -37,7 +37,7 @@ define amdgpu_kernel void @log_f16(ptr addrspace(1) %ptr) { ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[GLOBAL_LOAD_SHORT_D16_SADDR_t16_]], %subreg.lo16, [[DEF]], %subreg.hi16 ; CHECK-NEXT: [[V_LOG_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_LOG_F16_t16_e64 0, killed [[REG_SEQUENCE]].lo16, 0, 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_LOG_F16_t16_e64_]], %subreg.lo16, [[DEF1]], %subreg.hi16 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vgpr_32 = REG_SEQUENCE killed [[V_LOG_F16_t16_e64_]], %subreg.lo16, [[DEF1]], %subreg.hi16 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[REG_SEQUENCE1]] ; CHECK-NEXT: GLOBAL_STORE_SHORT_SADDR_t16 [[V_MOV_B32_e32_]], killed [[COPY1]], [[S_LOAD_DWORDX2_IMM]], 0, 0, implicit $exec :: (store (s16) into %ir.ptr.load, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 @@ -60,7 +60,7 @@ define amdgpu_kernel void @rcp_f16(ptr addrspace(1) %ptr) { ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[GLOBAL_LOAD_SHORT_D16_SADDR_t16_]], %subreg.lo16, [[DEF]], %subreg.hi16 ; CHECK-NEXT: [[V_RCP_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_RCP_F16_t16_e64 0, killed [[REG_SEQUENCE]].lo16, 0, 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_RCP_F16_t16_e64_]], %subreg.lo16, [[DEF1]], %subreg.hi16 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vgpr_32 = REG_SEQUENCE killed [[V_RCP_F16_t16_e64_]], %subreg.lo16, [[DEF1]], %subreg.hi16 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[REG_SEQUENCE1]] ; CHECK-NEXT: GLOBAL_STORE_SHORT_SADDR_t16 [[V_MOV_B32_e32_]], killed [[COPY1]], [[S_LOAD_DWORDX2_IMM]], 0, 0, implicit $exec :: (store (s16) into %ir.ptr.load, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 @@ -83,7 +83,7 @@ define amdgpu_kernel void @rsq_f16(ptr addrspace(1) %ptr) { ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[GLOBAL_LOAD_SHORT_D16_SADDR_t16_]], %subreg.lo16, [[DEF]], %subreg.hi16 ; CHECK-NEXT: [[V_RSQ_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_RSQ_F16_t16_e64 0, killed [[REG_SEQUENCE]].lo16, 0, 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_RSQ_F16_t16_e64_]], %subreg.lo16, [[DEF1]], %subreg.hi16 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vgpr_32 = REG_SEQUENCE killed [[V_RSQ_F16_t16_e64_]], %subreg.lo16, [[DEF1]], %subreg.hi16 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[REG_SEQUENCE1]] ; CHECK-NEXT: GLOBAL_STORE_SHORT_SADDR_t16 [[V_MOV_B32_e32_]], killed [[COPY1]], [[S_LOAD_DWORDX2_IMM]], 0, 0, implicit $exec :: (store (s16) into %ir.ptr.load, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 @@ -106,7 +106,7 @@ define amdgpu_kernel void @sqrt_f16(ptr addrspace(1) %ptr) { ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[GLOBAL_LOAD_SHORT_D16_SADDR_t16_]], %subreg.lo16, [[DEF]], %subreg.hi16 ; CHECK-NEXT: [[V_SQRT_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_SQRT_F16_t16_e64 0, killed [[REG_SEQUENCE]].lo16, 0, 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_SQRT_F16_t16_e64_]], %subreg.lo16, [[DEF1]], %subreg.hi16 + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vgpr_32 = REG_SEQUENCE killed [[V_SQRT_F16_t16_e64_]], %subreg.lo16, [[DEF1]], %subreg.hi16 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[REG_SEQUENCE1]] ; CHECK-NEXT: GLOBAL_STORE_SHORT_SADDR_t16 [[V_MOV_B32_e32_]], killed [[COPY1]], [[S_LOAD_DWORDX2_IMM]], 0, 0, implicit $exec :: (store (s16) into %ir.ptr.load, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0