diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 72ccf0df4f330..5dacc0993fc9b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1605,6 +1605,80 @@ bool AMDGPUInstructionSelector::selectG_PTR_MASK(MachineInstr &I) const { return true; } +bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT( + MachineInstr &MI) const { + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + Register IdxReg = MI.getOperand(2).getReg(); + + LLT DstTy = MRI->getType(DstReg); + LLT SrcTy = MRI->getType(SrcReg); + + const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); + const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI); + const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI); + + // The index must be scalar. If it wasn't RegBankSelect should have moved this + // into a waterfall loop. + if (IdxRB->getID() != AMDGPU::SGPRRegBankID) + return false; + + const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB, + *MRI); + const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(DstTy, *DstRB, + *MRI); + if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) || + !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) || + !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI)) + return false; + + MachineBasicBlock *BB = MI.getParent(); + const DebugLoc &DL = MI.getDebugLoc(); + const bool Is64 = DstTy.getSizeInBits() == 64; + + unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0; + + if (SrcRB->getID() == AMDGPU::SGPRRegBankID) { + if (DstTy.getSizeInBits() != 32 && !Is64) + return false; + + BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0) + .addReg(IdxReg); + + unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32; + BuildMI(*BB, &MI, DL, TII.get(Opc), DstReg) + .addReg(SrcReg, 0, SubReg) + .addReg(SrcReg, RegState::Implicit); + MI.eraseFromParent(); + return true; + } + + if (SrcRB->getID() != AMDGPU::VGPRRegBankID || DstTy.getSizeInBits() != 32) + return false; + + if (!STI.useVGPRIndexMode()) { + BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0) + .addReg(IdxReg); + BuildMI(*BB, &MI, DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg) + .addReg(SrcReg, RegState::Undef, SubReg) + .addReg(SrcReg, RegState::Implicit); + MI.eraseFromParent(); + return true; + } + + BuildMI(*BB, MI, DL, TII.get(AMDGPU::S_SET_GPR_IDX_ON)) + .addReg(IdxReg) + .addImm(AMDGPU::VGPRIndexMode::SRC0_ENABLE); + BuildMI(*BB, MI, DL, TII.get(AMDGPU::V_MOV_B32_e32), DstReg) + .addReg(SrcReg, RegState::Undef, SubReg) + .addReg(SrcReg, RegState::Implicit) + .addReg(AMDGPU::M0, RegState::Implicit); + BuildMI(*BB, MI, DL, TII.get(AMDGPU::S_SET_GPR_IDX_OFF)); + + MI.eraseFromParent(); + return true; +} + bool AMDGPUInstructionSelector::select(MachineInstr &I) { if (I.isPHI()) return selectPHI(I); @@ -1693,6 +1767,8 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) { return selectG_FRAME_INDEX(I); case TargetOpcode::G_PTR_MASK: return selectG_PTR_MASK(I); + case TargetOpcode::G_EXTRACT_VECTOR_ELT: + return selectG_EXTRACT_VECTOR_ELT(I); default: return selectImpl(I, *CoverageInfo); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 633c4d35137ee..d884afbe77077 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -116,6 +116,7 @@ class AMDGPUInstructionSelector : public InstructionSelector { bool selectG_BRCOND(MachineInstr &I) const; bool selectG_FRAME_INDEX(MachineInstr &I) const; bool selectG_PTR_MASK(MachineInstr &I) const; + bool selectG_EXTRACT_VECTOR_ELT(MachineInstr &I) const; std::pair selectVOP3ModsImpl(Register Src) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 46aea16a2bea0..16bde062b1d02 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -45,6 +45,11 @@ static cl::opt DisablePowerSched( cl::desc("Disable scheduling to minimize mAI power bursts"), cl::init(false)); +static cl::opt EnableVGPRIndexMode( + "amdgpu-vgpr-index-mode", + cl::desc("Use GPR indexing mode instead of movrel for vector indexing"), + cl::init(false)); + GCNSubtarget::~GCNSubtarget() = default; R600Subtarget & @@ -561,6 +566,10 @@ bool GCNSubtarget::hasMadF16() const { return InstrInfo.pseudoToMCOpcode(AMDGPU::V_MAD_F16) != -1; } +bool GCNSubtarget::useVGPRIndexMode() const { + return !hasMovrel() || (EnableVGPRIndexMode && hasVGPRIndexMode()); +} + unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const { if (getGeneration() >= AMDGPUSubtarget::GFX10) return getMaxWavesPerEU(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 08878d87fb096..b0188b003c7f5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -941,9 +941,7 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo, return HasVGPRIndexMode; } - bool useVGPRIndexMode(bool UserEnable) const { - return !hasMovrel() || (UserEnable && hasVGPRIndexMode()); - } + bool useVGPRIndexMode() const; bool hasScalarCompareEq64() const { return getGeneration() >= VOLCANIC_ISLANDS; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 7949596187053..e73d87cd66afa 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -90,11 +90,6 @@ using namespace llvm; STATISTIC(NumTailCalls, "Number of tail calls"); -static cl::opt EnableVGPRIndexMode( - "amdgpu-vgpr-index-mode", - cl::desc("Use GPR indexing mode instead of movrel for vector indexing"), - cl::init(false)); - static cl::opt DisableLoopAlignment( "amdgpu-disable-loop-alignment", cl::desc("Do not align and prefetch loops"), @@ -3415,7 +3410,7 @@ static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI, std::tie(SubReg, Offset) = computeIndirectRegAndOffset(TRI, VecRC, SrcReg, Offset); - bool UseGPRIdxMode = ST.useVGPRIndexMode(EnableVGPRIndexMode); + const bool UseGPRIdxMode = ST.useVGPRIndexMode(); if (setM0ToIndexFromSGPR(TII, MRI, MI, Offset, UseGPRIdxMode, true)) { MachineBasicBlock::iterator I(&MI); @@ -3510,7 +3505,7 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI, std::tie(SubReg, Offset) = computeIndirectRegAndOffset(TRI, VecRC, SrcVec->getReg(), Offset); - bool UseGPRIdxMode = ST.useVGPRIndexMode(EnableVGPRIndexMode); + const bool UseGPRIdxMode = ST.useVGPRIndexMode(); if (Idx->getReg() == AMDGPU::NoRegister) { MachineBasicBlock::iterator I(&MI); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll new file mode 100644 index 0000000000000..4f9d35dd905f0 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll @@ -0,0 +1,1289 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GPRIDX %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=MOVREL %s + +define float @dyn_extract_v8f32_const_s_v(i32 %sel) { +; GPRIDX-LABEL: dyn_extract_v8f32_const_s_v: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GPRIDX-NEXT: s_mov_b32 s4, 1.0 +; GPRIDX-NEXT: s_mov_b32 s5, 2.0 +; GPRIDX-NEXT: s_mov_b32 s6, 0x40400000 +; GPRIDX-NEXT: s_mov_b32 s7, 4.0 +; GPRIDX-NEXT: s_mov_b32 s8, 0x40a00000 +; GPRIDX-NEXT: s_mov_b32 s9, 0x40c00000 +; GPRIDX-NEXT: s_mov_b32 s10, 0x40e00000 +; GPRIDX-NEXT: s_mov_b32 s11, 0x41000000 +; GPRIDX-NEXT: s_mov_b64 s[12:13], exec +; GPRIDX-NEXT: BB0_1: ; =>This Inner Loop Header: Depth=1 +; GPRIDX-NEXT: v_readfirstlane_b32 s14, v0 +; GPRIDX-NEXT: s_mov_b32 m0, s14 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s14, v0 +; GPRIDX-NEXT: s_movrels_b32 s14, s4 +; GPRIDX-NEXT: v_mov_b32_e32 v1, s14 +; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc +; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc +; GPRIDX-NEXT: s_cbranch_execnz BB0_1 +; GPRIDX-NEXT: ; %bb.2: +; GPRIDX-NEXT: s_mov_b64 exec, s[12:13] +; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 +; GPRIDX-NEXT: s_setpc_b64 s[30:31] +; +; MOVREL-LABEL: dyn_extract_v8f32_const_s_v: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; MOVREL-NEXT: s_mov_b32 s4, 1.0 +; MOVREL-NEXT: s_mov_b32 s5, 2.0 +; MOVREL-NEXT: s_mov_b32 s6, 0x40400000 +; MOVREL-NEXT: s_mov_b32 s7, 4.0 +; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000 +; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000 +; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000 +; MOVREL-NEXT: s_mov_b32 s11, 0x41000000 +; MOVREL-NEXT: s_mov_b64 s[12:13], exec +; MOVREL-NEXT: BB0_1: ; =>This Inner Loop Header: Depth=1 +; MOVREL-NEXT: v_readfirstlane_b32 s14, v0 +; MOVREL-NEXT: s_mov_b32 m0, s14 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s14, v0 +; MOVREL-NEXT: s_movrels_b32 s14, s4 +; MOVREL-NEXT: v_mov_b32_e32 v1, s14 +; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc +; MOVREL-NEXT: s_xor_b64 exec, exec, vcc +; MOVREL-NEXT: s_cbranch_execnz BB0_1 +; MOVREL-NEXT: ; %bb.2: +; MOVREL-NEXT: s_mov_b64 exec, s[12:13] +; MOVREL-NEXT: v_mov_b32_e32 v0, v1 +; MOVREL-NEXT: s_setpc_b64 s[30:31] +entry: + %ext = extractelement <8 x float> , i32 %sel + ret float %ext +} + +define amdgpu_ps float @dyn_extract_v8f32_const_s_s(i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v8f32_const_s_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s4, 1.0 +; GPRIDX-NEXT: s_mov_b32 m0, s2 +; GPRIDX-NEXT: s_mov_b32 s5, 2.0 +; GPRIDX-NEXT: s_mov_b32 s6, 0x40400000 +; GPRIDX-NEXT: s_mov_b32 s7, 4.0 +; GPRIDX-NEXT: s_mov_b32 s8, 0x40a00000 +; GPRIDX-NEXT: s_mov_b32 s9, 0x40c00000 +; GPRIDX-NEXT: s_mov_b32 s10, 0x40e00000 +; GPRIDX-NEXT: s_mov_b32 s11, 0x41000000 +; GPRIDX-NEXT: s_movrels_b32 s0, s4 +; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v8f32_const_s_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s4, 1.0 +; MOVREL-NEXT: s_mov_b32 m0, s2 +; MOVREL-NEXT: s_mov_b32 s5, 2.0 +; MOVREL-NEXT: s_mov_b32 s6, 0x40400000 +; MOVREL-NEXT: s_mov_b32 s7, 4.0 +; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000 +; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000 +; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000 +; MOVREL-NEXT: s_mov_b32 s11, 0x41000000 +; MOVREL-NEXT: s_movrels_b32 s0, s4 +; MOVREL-NEXT: v_mov_b32_e32 v0, s0 +; MOVREL-NEXT: ; return to shader part epilog +entry: + %ext = extractelement <8 x float> , i32 %sel + ret float %ext +} + +define amdgpu_ps float @dyn_extract_v8f32_s_v(<8 x float> inreg %vec, i32 %sel) { +; GPRIDX-LABEL: dyn_extract_v8f32_s_v: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b64 s[8:9], exec +; GPRIDX-NEXT: BB2_1: ; =>This Inner Loop Header: Depth=1 +; GPRIDX-NEXT: v_readfirstlane_b32 s10, v0 +; GPRIDX-NEXT: s_mov_b32 m0, s10 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s10, v0 +; GPRIDX-NEXT: s_movrels_b32 s10, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v1, s10 +; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc +; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc +; GPRIDX-NEXT: s_cbranch_execnz BB2_1 +; GPRIDX-NEXT: ; %bb.2: +; GPRIDX-NEXT: s_mov_b64 exec, s[8:9] +; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v8f32_s_v: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b64 s[8:9], exec +; MOVREL-NEXT: BB2_1: ; =>This Inner Loop Header: Depth=1 +; MOVREL-NEXT: v_readfirstlane_b32 s10, v0 +; MOVREL-NEXT: s_mov_b32 m0, s10 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s10, v0 +; MOVREL-NEXT: s_movrels_b32 s10, s0 +; MOVREL-NEXT: v_mov_b32_e32 v1, s10 +; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc +; MOVREL-NEXT: s_xor_b64 exec, exec, vcc +; MOVREL-NEXT: s_cbranch_execnz BB2_1 +; MOVREL-NEXT: ; %bb.2: +; MOVREL-NEXT: s_mov_b64 exec, s[8:9] +; MOVREL-NEXT: v_mov_b32_e32 v0, v1 +; MOVREL-NEXT: ; return to shader part epilog +entry: + %ext = extractelement <8 x float> %vec, i32 %sel + ret float %ext +} + +define float @dyn_extract_v8f32_v_v(<8 x float> %vec, i32 %sel) { +; GPRIDX-LABEL: dyn_extract_v8f32_v_v: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GPRIDX-NEXT: s_mov_b64 s[4:5], exec +; GPRIDX-NEXT: BB3_1: ; =>This Inner Loop Header: Depth=1 +; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8 +; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v9, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc +; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc +; GPRIDX-NEXT: s_cbranch_execnz BB3_1 +; GPRIDX-NEXT: ; %bb.2: +; GPRIDX-NEXT: s_mov_b64 exec, s[4:5] +; GPRIDX-NEXT: v_mov_b32_e32 v0, v9 +; GPRIDX-NEXT: s_setpc_b64 s[30:31] +; +; MOVREL-LABEL: dyn_extract_v8f32_v_v: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; MOVREL-NEXT: s_mov_b64 s[4:5], exec +; MOVREL-NEXT: BB3_1: ; =>This Inner Loop Header: Depth=1 +; MOVREL-NEXT: v_readfirstlane_b32 s6, v8 +; MOVREL-NEXT: s_mov_b32 m0, s6 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8 +; MOVREL-NEXT: v_movrels_b32_e32 v9, v0 +; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc +; MOVREL-NEXT: s_xor_b64 exec, exec, vcc +; MOVREL-NEXT: s_cbranch_execnz BB3_1 +; MOVREL-NEXT: ; %bb.2: +; MOVREL-NEXT: s_mov_b64 exec, s[4:5] +; MOVREL-NEXT: v_mov_b32_e32 v0, v9 +; MOVREL-NEXT: s_setpc_b64 s[30:31] +entry: + %ext = extractelement <8 x float> %vec, i32 %sel + ret float %ext +} + +define amdgpu_ps float @dyn_extract_v8f32_v_s(<8 x float> %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v8f32_v_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v0, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v8f32_v_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 m0, s2 +; MOVREL-NEXT: v_movrels_b32_e32 v0, v0 +; MOVREL-NEXT: ; return to shader part epilog +entry: + %ext = extractelement <8 x float> %vec, i32 %sel + ret float %ext +} + +define amdgpu_ps float @dyn_extract_v8f32_s_s(<8 x float> inreg %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v8f32_s_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 m0, s10 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_movrels_b32 s0, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v8f32_s_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 m0, s10 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_movrels_b32 s0, s0 +; MOVREL-NEXT: v_mov_b32_e32 v0, s0 +; MOVREL-NEXT: ; return to shader part epilog +entry: + %ext = extractelement <8 x float> %vec, i32 %sel + ret float %ext +} + +define i64 @dyn_extract_v8i64_const_s_v(i32 %sel) { +; GPRIDX-LABEL: dyn_extract_v8i64_const_s_v: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GPRIDX-NEXT: s_mov_b64 s[4:5], 1 +; GPRIDX-NEXT: s_mov_b64 s[6:7], 2 +; GPRIDX-NEXT: s_mov_b64 s[8:9], 3 +; GPRIDX-NEXT: s_mov_b64 s[10:11], 4 +; GPRIDX-NEXT: s_mov_b64 s[12:13], 5 +; GPRIDX-NEXT: s_mov_b64 s[14:15], 6 +; GPRIDX-NEXT: s_mov_b64 s[16:17], 7 +; GPRIDX-NEXT: s_mov_b64 s[18:19], 8 +; GPRIDX-NEXT: s_mov_b64 s[20:21], exec +; GPRIDX-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1 +; GPRIDX-NEXT: v_readfirstlane_b32 s22, v0 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s22, v0 +; GPRIDX-NEXT: s_lshl_b32 s22, s22, 1 +; GPRIDX-NEXT: s_add_u32 s23, s22, 1 +; GPRIDX-NEXT: s_mov_b32 m0, s22 +; GPRIDX-NEXT: s_nop 0 +; GPRIDX-NEXT: s_movrels_b32 s22, s4 +; GPRIDX-NEXT: s_mov_b32 m0, s23 +; GPRIDX-NEXT: s_nop 0 +; GPRIDX-NEXT: s_movrels_b32 s23, s4 +; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc +; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc +; GPRIDX-NEXT: s_cbranch_execnz BB6_1 +; GPRIDX-NEXT: ; %bb.2: +; GPRIDX-NEXT: s_mov_b64 exec, s[20:21] +; GPRIDX-NEXT: v_mov_b32_e32 v0, s22 +; GPRIDX-NEXT: v_mov_b32_e32 v1, s23 +; GPRIDX-NEXT: s_setpc_b64 s[30:31] +; +; MOVREL-LABEL: dyn_extract_v8i64_const_s_v: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; MOVREL-NEXT: s_mov_b64 s[4:5], 1 +; MOVREL-NEXT: s_mov_b64 s[6:7], 2 +; MOVREL-NEXT: s_mov_b64 s[8:9], 3 +; MOVREL-NEXT: s_mov_b64 s[10:11], 4 +; MOVREL-NEXT: s_mov_b64 s[12:13], 5 +; MOVREL-NEXT: s_mov_b64 s[14:15], 6 +; MOVREL-NEXT: s_mov_b64 s[16:17], 7 +; MOVREL-NEXT: s_mov_b64 s[18:19], 8 +; MOVREL-NEXT: s_mov_b64 s[20:21], exec +; MOVREL-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1 +; MOVREL-NEXT: v_readfirstlane_b32 s22, v0 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s22, v0 +; MOVREL-NEXT: s_lshl_b32 s22, s22, 1 +; MOVREL-NEXT: s_add_u32 s23, s22, 1 +; MOVREL-NEXT: s_mov_b32 m0, s22 +; MOVREL-NEXT: s_movrels_b32 s22, s4 +; MOVREL-NEXT: s_mov_b32 m0, s23 +; MOVREL-NEXT: s_movrels_b32 s23, s4 +; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc +; MOVREL-NEXT: s_xor_b64 exec, exec, vcc +; MOVREL-NEXT: s_cbranch_execnz BB6_1 +; MOVREL-NEXT: ; %bb.2: +; MOVREL-NEXT: s_mov_b64 exec, s[20:21] +; MOVREL-NEXT: v_mov_b32_e32 v0, s22 +; MOVREL-NEXT: v_mov_b32_e32 v1, s23 +; MOVREL-NEXT: s_setpc_b64 s[30:31] +entry: + %ext = extractelement <8 x i64> , i32 %sel + ret i64 %ext +} + +define amdgpu_ps void @dyn_extract_v8i64_const_s_s(i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v8i64_const_s_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b64 s[4:5], 1 +; GPRIDX-NEXT: s_mov_b32 m0, s2 +; GPRIDX-NEXT: s_mov_b64 s[6:7], 2 +; GPRIDX-NEXT: s_mov_b64 s[8:9], 3 +; GPRIDX-NEXT: s_mov_b64 s[10:11], 4 +; GPRIDX-NEXT: s_mov_b64 s[12:13], 5 +; GPRIDX-NEXT: s_mov_b64 s[14:15], 6 +; GPRIDX-NEXT: s_mov_b64 s[16:17], 7 +; GPRIDX-NEXT: s_mov_b64 s[18:19], 8 +; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[4:5] +; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 +; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off +; GPRIDX-NEXT: s_endpgm +; +; MOVREL-LABEL: dyn_extract_v8i64_const_s_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b64 s[4:5], 1 +; MOVREL-NEXT: s_mov_b32 m0, s2 +; MOVREL-NEXT: s_mov_b64 s[6:7], 2 +; MOVREL-NEXT: s_mov_b64 s[8:9], 3 +; MOVREL-NEXT: s_mov_b64 s[10:11], 4 +; MOVREL-NEXT: s_mov_b64 s[12:13], 5 +; MOVREL-NEXT: s_mov_b64 s[14:15], 6 +; MOVREL-NEXT: s_mov_b64 s[16:17], 7 +; MOVREL-NEXT: s_mov_b64 s[18:19], 8 +; MOVREL-NEXT: s_movrels_b64 s[0:1], s[4:5] +; MOVREL-NEXT: v_mov_b32_e32 v0, s0 +; MOVREL-NEXT: v_mov_b32_e32 v1, s1 +; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] +; MOVREL-NEXT: s_endpgm +entry: + %ext = extractelement <8 x i64> , i32 %sel + store i64 %ext, i64 addrspace(1)* undef + ret void +} + +define amdgpu_ps void @dyn_extract_v8i64_s_v(<8 x i64> inreg %vec, i32 %sel) { +; GPRIDX-LABEL: dyn_extract_v8i64_s_v: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 s10, s12 +; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_mov_b32 s12, s14 +; GPRIDX-NEXT: s_mov_b32 s13, s15 +; GPRIDX-NEXT: s_mov_b32 s14, s16 +; GPRIDX-NEXT: s_mov_b32 s15, s17 +; GPRIDX-NEXT: s_mov_b64 s[16:17], exec +; GPRIDX-NEXT: BB8_1: ; =>This Inner Loop Header: Depth=1 +; GPRIDX-NEXT: v_readfirstlane_b32 s18, v0 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s18, v0 +; GPRIDX-NEXT: s_lshl_b32 s18, s18, 1 +; GPRIDX-NEXT: s_add_u32 s19, s18, 1 +; GPRIDX-NEXT: s_mov_b32 m0, s18 +; GPRIDX-NEXT: s_nop 0 +; GPRIDX-NEXT: s_movrels_b32 s18, s0 +; GPRIDX-NEXT: s_mov_b32 m0, s19 +; GPRIDX-NEXT: v_mov_b32_e32 v1, s18 +; GPRIDX-NEXT: s_movrels_b32 s19, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v2, s19 +; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc +; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc +; GPRIDX-NEXT: s_cbranch_execnz BB8_1 +; GPRIDX-NEXT: ; %bb.2: +; GPRIDX-NEXT: s_mov_b64 exec, s[16:17] +; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[1:2], off +; GPRIDX-NEXT: s_endpgm +; +; MOVREL-LABEL: dyn_extract_v8i64_s_v: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 s10, s12 +; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_mov_b32 s12, s14 +; MOVREL-NEXT: s_mov_b32 s13, s15 +; MOVREL-NEXT: s_mov_b32 s14, s16 +; MOVREL-NEXT: s_mov_b32 s15, s17 +; MOVREL-NEXT: s_mov_b64 s[16:17], exec +; MOVREL-NEXT: BB8_1: ; =>This Inner Loop Header: Depth=1 +; MOVREL-NEXT: v_readfirstlane_b32 s18, v0 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s18, v0 +; MOVREL-NEXT: s_lshl_b32 s18, s18, 1 +; MOVREL-NEXT: s_add_u32 s19, s18, 1 +; MOVREL-NEXT: s_mov_b32 m0, s18 +; MOVREL-NEXT: s_movrels_b32 s18, s0 +; MOVREL-NEXT: s_mov_b32 m0, s19 +; MOVREL-NEXT: s_movrels_b32 s19, s0 +; MOVREL-NEXT: v_mov_b32_e32 v1, s18 +; MOVREL-NEXT: v_mov_b32_e32 v2, s19 +; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc +; MOVREL-NEXT: s_xor_b64 exec, exec, vcc +; MOVREL-NEXT: s_cbranch_execnz BB8_1 +; MOVREL-NEXT: ; %bb.2: +; MOVREL-NEXT: s_mov_b64 exec, s[16:17] +; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[1:2] +; MOVREL-NEXT: s_endpgm +entry: + %ext = extractelement <8 x i64> %vec, i32 %sel + store i64 %ext, i64 addrspace(1)* undef + ret void +} + +define i64 @dyn_extract_v8i64_v_v(<8 x i64> %vec, i32 %sel) { +; GPRIDX-LABEL: dyn_extract_v8i64_v_v: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GPRIDX-NEXT: s_mov_b64 s[4:5], exec +; GPRIDX-NEXT: BB9_1: ; =>This Inner Loop Header: Depth=1 +; GPRIDX-NEXT: v_readfirstlane_b32 s6, v16 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16 +; GPRIDX-NEXT: s_lshl_b32 s6, s6, 1 +; GPRIDX-NEXT: s_add_u32 s7, s6, 1 +; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v17, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v18, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc +; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc +; GPRIDX-NEXT: s_cbranch_execnz BB9_1 +; GPRIDX-NEXT: ; %bb.2: +; GPRIDX-NEXT: s_mov_b64 exec, s[4:5] +; GPRIDX-NEXT: v_mov_b32_e32 v0, v17 +; GPRIDX-NEXT: v_mov_b32_e32 v1, v18 +; GPRIDX-NEXT: s_setpc_b64 s[30:31] +; +; MOVREL-LABEL: dyn_extract_v8i64_v_v: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; MOVREL-NEXT: s_mov_b64 s[4:5], exec +; MOVREL-NEXT: BB9_1: ; =>This Inner Loop Header: Depth=1 +; MOVREL-NEXT: v_readfirstlane_b32 s6, v16 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16 +; MOVREL-NEXT: s_lshl_b32 s6, s6, 1 +; MOVREL-NEXT: s_mov_b32 m0, s6 +; MOVREL-NEXT: s_add_u32 s7, s6, 1 +; MOVREL-NEXT: v_movrels_b32_e32 v17, v0 +; MOVREL-NEXT: s_mov_b32 m0, s7 +; MOVREL-NEXT: v_movrels_b32_e32 v18, v0 +; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc +; MOVREL-NEXT: s_xor_b64 exec, exec, vcc +; MOVREL-NEXT: s_cbranch_execnz BB9_1 +; MOVREL-NEXT: ; %bb.2: +; MOVREL-NEXT: s_mov_b64 exec, s[4:5] +; MOVREL-NEXT: v_mov_b32_e32 v0, v17 +; MOVREL-NEXT: v_mov_b32_e32 v1, v18 +; MOVREL-NEXT: s_setpc_b64 s[30:31] +entry: + %ext = extractelement <8 x i64> %vec, i32 %sel + ret i64 %ext +} + +define amdgpu_ps void @dyn_extract_v8i64_v_s(<8 x i64> %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v8i64_v_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 +; GPRIDX-NEXT: s_add_u32 s1, s0, 1 +; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v16, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_set_gpr_idx_on s1, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v17, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[16:17], off +; GPRIDX-NEXT: s_endpgm +; +; MOVREL-LABEL: dyn_extract_v8i64_v_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_lshl_b32 s0, s2, 1 +; MOVREL-NEXT: s_mov_b32 m0, s0 +; MOVREL-NEXT: s_add_u32 s0, s0, 1 +; MOVREL-NEXT: v_movrels_b32_e32 v16, v0 +; MOVREL-NEXT: s_mov_b32 m0, s0 +; MOVREL-NEXT: v_movrels_b32_e32 v17, v0 +; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[16:17] +; MOVREL-NEXT: s_endpgm +entry: + %ext = extractelement <8 x i64> %vec, i32 %sel + store i64 %ext, i64 addrspace(1)* undef + ret void +} + +define amdgpu_ps void @dyn_extract_v8i64_s_s(<8 x i64> inreg %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v8i64_s_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 m0, s18 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 s10, s12 +; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_mov_b32 s12, s14 +; GPRIDX-NEXT: s_mov_b32 s13, s15 +; GPRIDX-NEXT: s_mov_b32 s14, s16 +; GPRIDX-NEXT: s_mov_b32 s15, s17 +; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 +; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off +; GPRIDX-NEXT: s_endpgm +; +; MOVREL-LABEL: dyn_extract_v8i64_s_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 m0, s18 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 s10, s12 +; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_mov_b32 s12, s14 +; MOVREL-NEXT: s_mov_b32 s13, s15 +; MOVREL-NEXT: s_mov_b32 s14, s16 +; MOVREL-NEXT: s_mov_b32 s15, s17 +; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] +; MOVREL-NEXT: v_mov_b32_e32 v0, s0 +; MOVREL-NEXT: v_mov_b32_e32 v1, s1 +; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] +; MOVREL-NEXT: s_endpgm +entry: + %ext = extractelement <8 x i64> %vec, i32 %sel + store i64 %ext, i64 addrspace(1)* undef + ret void +} + +define amdgpu_ps float @dyn_extract_v8f32_s_s_offset3(<8 x float> inreg %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v8f32_s_s_offset3: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_add_u32 m0, s10, 3 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_movrels_b32 s0, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v8f32_s_s_offset3: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_add_u32 m0, s10, 3 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_movrels_b32 s0, s0 +; MOVREL-NEXT: v_mov_b32_e32 v0, s0 +; MOVREL-NEXT: ; return to shader part epilog +entry: + %add = add i32 %sel, 3 + %ext = extractelement <8 x float> %vec, i32 %add + ret float %ext +} + +define float @dyn_extract_v8f32_v_v_offset3(<8 x float> %vec, i32 %sel) { +; GPRIDX-LABEL: dyn_extract_v8f32_v_v_offset3: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GPRIDX-NEXT: v_add_u32_e32 v9, 3, v8 +; GPRIDX-NEXT: s_mov_b64 s[4:5], exec +; GPRIDX-NEXT: BB13_1: ; =>This Inner Loop Header: Depth=1 +; GPRIDX-NEXT: v_readfirstlane_b32 s6, v9 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v9 +; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v8, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc +; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc +; GPRIDX-NEXT: s_cbranch_execnz BB13_1 +; GPRIDX-NEXT: ; %bb.2: +; GPRIDX-NEXT: s_mov_b64 exec, s[4:5] +; GPRIDX-NEXT: v_mov_b32_e32 v0, v8 +; GPRIDX-NEXT: s_setpc_b64 s[30:31] +; +; MOVREL-LABEL: dyn_extract_v8f32_v_v_offset3: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; MOVREL-NEXT: v_add_u32_e32 v9, vcc, 3, v8 +; MOVREL-NEXT: s_mov_b64 s[4:5], exec +; MOVREL-NEXT: BB13_1: ; =>This Inner Loop Header: Depth=1 +; MOVREL-NEXT: v_readfirstlane_b32 s6, v9 +; MOVREL-NEXT: s_mov_b32 m0, s6 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v9 +; MOVREL-NEXT: v_movrels_b32_e32 v8, v0 +; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc +; MOVREL-NEXT: s_xor_b64 exec, exec, vcc +; MOVREL-NEXT: s_cbranch_execnz BB13_1 +; MOVREL-NEXT: ; %bb.2: +; MOVREL-NEXT: s_mov_b64 exec, s[4:5] +; MOVREL-NEXT: v_mov_b32_e32 v0, v8 +; MOVREL-NEXT: s_setpc_b64 s[30:31] +entry: + %add = add i32 %sel, 3 + %ext = extractelement <8 x float> %vec, i32 %add + ret float %ext +} + +define amdgpu_ps double @dyn_extract_v8f64_s_s_offset1(<8 x double> inreg %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset1: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_add_u32 m0, s18, 1 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 s10, s12 +; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_mov_b32 s12, s14 +; GPRIDX-NEXT: s_mov_b32 s13, s15 +; GPRIDX-NEXT: s_mov_b32 s14, s16 +; GPRIDX-NEXT: s_mov_b32 s15, s17 +; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset1: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_add_u32 m0, s18, 1 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 s10, s12 +; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_mov_b32 s12, s14 +; MOVREL-NEXT: s_mov_b32 s13, s15 +; MOVREL-NEXT: s_mov_b32 s14, s16 +; MOVREL-NEXT: s_mov_b32 s15, s17 +; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] +; MOVREL-NEXT: ; return to shader part epilog +entry: + %add = add i32 %sel, 1 + %ext = extractelement <8 x double> %vec, i32 %add + ret double %ext +} + +define amdgpu_ps double @dyn_extract_v8f64_s_s_offset2(<8 x double> inreg %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset2: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_add_u32 m0, s18, 2 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 s10, s12 +; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_mov_b32 s12, s14 +; GPRIDX-NEXT: s_mov_b32 s13, s15 +; GPRIDX-NEXT: s_mov_b32 s14, s16 +; GPRIDX-NEXT: s_mov_b32 s15, s17 +; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset2: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_add_u32 m0, s18, 2 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 s10, s12 +; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_mov_b32 s12, s14 +; MOVREL-NEXT: s_mov_b32 s13, s15 +; MOVREL-NEXT: s_mov_b32 s14, s16 +; MOVREL-NEXT: s_mov_b32 s15, s17 +; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] +; MOVREL-NEXT: ; return to shader part epilog +entry: + %add = add i32 %sel, 2 + %ext = extractelement <8 x double> %vec, i32 %add + ret double %ext +} + +define amdgpu_ps double @dyn_extract_v8f64_s_s_offset3(<8 x double> inreg %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset3: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_add_u32 m0, s18, 3 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 s10, s12 +; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_mov_b32 s12, s14 +; GPRIDX-NEXT: s_mov_b32 s13, s15 +; GPRIDX-NEXT: s_mov_b32 s14, s16 +; GPRIDX-NEXT: s_mov_b32 s15, s17 +; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset3: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_add_u32 m0, s18, 3 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 s10, s12 +; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_mov_b32 s12, s14 +; MOVREL-NEXT: s_mov_b32 s13, s15 +; MOVREL-NEXT: s_mov_b32 s14, s16 +; MOVREL-NEXT: s_mov_b32 s15, s17 +; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] +; MOVREL-NEXT: ; return to shader part epilog +entry: + %add = add i32 %sel, 3 + %ext = extractelement <8 x double> %vec, i32 %add + ret double %ext +} + +define amdgpu_ps double @dyn_extract_v8f64_s_s_offset4(<8 x double> inreg %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset4: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_add_u32 m0, s18, 4 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 s10, s12 +; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_mov_b32 s12, s14 +; GPRIDX-NEXT: s_mov_b32 s13, s15 +; GPRIDX-NEXT: s_mov_b32 s14, s16 +; GPRIDX-NEXT: s_mov_b32 s15, s17 +; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset4: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_add_u32 m0, s18, 4 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 s10, s12 +; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_mov_b32 s12, s14 +; MOVREL-NEXT: s_mov_b32 s13, s15 +; MOVREL-NEXT: s_mov_b32 s14, s16 +; MOVREL-NEXT: s_mov_b32 s15, s17 +; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] +; MOVREL-NEXT: ; return to shader part epilog +entry: + %add = add i32 %sel, 4 + %ext = extractelement <8 x double> %vec, i32 %add + ret double %ext +} + +define amdgpu_ps double @dyn_extract_v8f64_s_s_offset5(<8 x double> inreg %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset5: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_add_u32 m0, s18, 5 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 s10, s12 +; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_mov_b32 s12, s14 +; GPRIDX-NEXT: s_mov_b32 s13, s15 +; GPRIDX-NEXT: s_mov_b32 s14, s16 +; GPRIDX-NEXT: s_mov_b32 s15, s17 +; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset5: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_add_u32 m0, s18, 5 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 s10, s12 +; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_mov_b32 s12, s14 +; MOVREL-NEXT: s_mov_b32 s13, s15 +; MOVREL-NEXT: s_mov_b32 s14, s16 +; MOVREL-NEXT: s_mov_b32 s15, s17 +; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] +; MOVREL-NEXT: ; return to shader part epilog +entry: + %add = add i32 %sel, 5 + %ext = extractelement <8 x double> %vec, i32 %add + ret double %ext +} + +define amdgpu_ps double @dyn_extract_v8f64_s_s_offset6(<8 x double> inreg %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset6: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_add_u32 m0, s18, 6 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 s10, s12 +; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_mov_b32 s12, s14 +; GPRIDX-NEXT: s_mov_b32 s13, s15 +; GPRIDX-NEXT: s_mov_b32 s14, s16 +; GPRIDX-NEXT: s_mov_b32 s15, s17 +; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset6: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_add_u32 m0, s18, 6 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 s10, s12 +; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_mov_b32 s12, s14 +; MOVREL-NEXT: s_mov_b32 s13, s15 +; MOVREL-NEXT: s_mov_b32 s14, s16 +; MOVREL-NEXT: s_mov_b32 s15, s17 +; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] +; MOVREL-NEXT: ; return to shader part epilog +entry: + %add = add i32 %sel, 6 + %ext = extractelement <8 x double> %vec, i32 %add + ret double %ext +} + +define amdgpu_ps double @dyn_extract_v8f64_s_s_offset7(<8 x double> inreg %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset7: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_add_u32 m0, s18, 7 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 s10, s12 +; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_mov_b32 s12, s14 +; GPRIDX-NEXT: s_mov_b32 s13, s15 +; GPRIDX-NEXT: s_mov_b32 s14, s16 +; GPRIDX-NEXT: s_mov_b32 s15, s17 +; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset7: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_add_u32 m0, s18, 7 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 s10, s12 +; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_mov_b32 s12, s14 +; MOVREL-NEXT: s_mov_b32 s13, s15 +; MOVREL-NEXT: s_mov_b32 s14, s16 +; MOVREL-NEXT: s_mov_b32 s15, s17 +; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] +; MOVREL-NEXT: ; return to shader part epilog +entry: + %add = add i32 %sel, 7 + %ext = extractelement <8 x double> %vec, i32 %add + ret double %ext +} + +define amdgpu_ps double @dyn_extract_v8f64_s_s_offsetm1(<8 x double> inreg %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offsetm1: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_add_u32 m0, s18, -1 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 s10, s12 +; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_mov_b32 s12, s14 +; GPRIDX-NEXT: s_mov_b32 s13, s15 +; GPRIDX-NEXT: s_mov_b32 s14, s16 +; GPRIDX-NEXT: s_mov_b32 s15, s17 +; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v8f64_s_s_offsetm1: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_add_u32 m0, s18, -1 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 s10, s12 +; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_mov_b32 s12, s14 +; MOVREL-NEXT: s_mov_b32 s13, s15 +; MOVREL-NEXT: s_mov_b32 s14, s16 +; MOVREL-NEXT: s_mov_b32 s15, s17 +; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] +; MOVREL-NEXT: ; return to shader part epilog +entry: + %add = add i32 %sel, -1 + %ext = extractelement <8 x double> %vec, i32 %add + ret double %ext +} + +define double @dyn_extract_v8f64_v_v_offset3(<8 x double> %vec, i32 %sel) { +; GPRIDX-LABEL: dyn_extract_v8f64_v_v_offset3: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GPRIDX-NEXT: v_add_u32_e32 v18, 3, v16 +; GPRIDX-NEXT: s_mov_b64 s[4:5], exec +; GPRIDX-NEXT: BB22_1: ; =>This Inner Loop Header: Depth=1 +; GPRIDX-NEXT: v_readfirstlane_b32 s6, v18 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v18 +; GPRIDX-NEXT: s_lshl_b32 s6, s6, 1 +; GPRIDX-NEXT: s_add_u32 s7, s6, 1 +; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v16, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v17, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc +; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc +; GPRIDX-NEXT: s_cbranch_execnz BB22_1 +; GPRIDX-NEXT: ; %bb.2: +; GPRIDX-NEXT: s_mov_b64 exec, s[4:5] +; GPRIDX-NEXT: v_mov_b32_e32 v0, v16 +; GPRIDX-NEXT: v_mov_b32_e32 v1, v17 +; GPRIDX-NEXT: s_setpc_b64 s[30:31] +; +; MOVREL-LABEL: dyn_extract_v8f64_v_v_offset3: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; MOVREL-NEXT: v_add_u32_e32 v18, vcc, 3, v16 +; MOVREL-NEXT: s_mov_b64 s[4:5], exec +; MOVREL-NEXT: BB22_1: ; =>This Inner Loop Header: Depth=1 +; MOVREL-NEXT: v_readfirstlane_b32 s6, v18 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v18 +; MOVREL-NEXT: s_lshl_b32 s6, s6, 1 +; MOVREL-NEXT: s_mov_b32 m0, s6 +; MOVREL-NEXT: s_add_u32 s7, s6, 1 +; MOVREL-NEXT: v_movrels_b32_e32 v16, v0 +; MOVREL-NEXT: s_mov_b32 m0, s7 +; MOVREL-NEXT: v_movrels_b32_e32 v17, v0 +; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc +; MOVREL-NEXT: s_xor_b64 exec, exec, vcc +; MOVREL-NEXT: s_cbranch_execnz BB22_1 +; MOVREL-NEXT: ; %bb.2: +; MOVREL-NEXT: s_mov_b64 exec, s[4:5] +; MOVREL-NEXT: v_mov_b32_e32 v0, v16 +; MOVREL-NEXT: v_mov_b32_e32 v1, v17 +; MOVREL-NEXT: s_setpc_b64 s[30:31] +entry: + %add = add i32 %sel, 3 + %ext = extractelement <8 x double> %vec, i32 %add + ret double %ext +} + +define i8 addrspace(3)* @dyn_extract_v8p3_v_v(<8 x i8 addrspace(3)*> %vec, i32 %idx) { +; GPRIDX-LABEL: dyn_extract_v8p3_v_v: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GPRIDX-NEXT: s_mov_b64 s[4:5], exec +; GPRIDX-NEXT: BB23_1: ; =>This Inner Loop Header: Depth=1 +; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8 +; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v9, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc +; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc +; GPRIDX-NEXT: s_cbranch_execnz BB23_1 +; GPRIDX-NEXT: ; %bb.2: +; GPRIDX-NEXT: s_mov_b64 exec, s[4:5] +; GPRIDX-NEXT: v_mov_b32_e32 v0, v9 +; GPRIDX-NEXT: s_setpc_b64 s[30:31] +; +; MOVREL-LABEL: dyn_extract_v8p3_v_v: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; MOVREL-NEXT: s_mov_b64 s[4:5], exec +; MOVREL-NEXT: BB23_1: ; =>This Inner Loop Header: Depth=1 +; MOVREL-NEXT: v_readfirstlane_b32 s6, v8 +; MOVREL-NEXT: s_mov_b32 m0, s6 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8 +; MOVREL-NEXT: v_movrels_b32_e32 v9, v0 +; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc +; MOVREL-NEXT: s_xor_b64 exec, exec, vcc +; MOVREL-NEXT: s_cbranch_execnz BB23_1 +; MOVREL-NEXT: ; %bb.2: +; MOVREL-NEXT: s_mov_b64 exec, s[4:5] +; MOVREL-NEXT: v_mov_b32_e32 v0, v9 +; MOVREL-NEXT: s_setpc_b64 s[30:31] +entry: + %ext = extractelement <8 x i8 addrspace(3)*> %vec, i32 %idx + ret i8 addrspace(3)* %ext +} + +define amdgpu_ps void @dyn_extract_v8p3_s_s(<8 x i8 addrspace(3)*> inreg %vec, i32 inreg %idx) { +; GPRIDX-LABEL: dyn_extract_v8p3_s_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 m0, s10 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_movrels_b32 s0, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 +; GPRIDX-NEXT: ds_write_b32 v0, v0 +; GPRIDX-NEXT: s_endpgm +; +; MOVREL-LABEL: dyn_extract_v8p3_s_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 m0, s10 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_movrels_b32 s0, s0 +; MOVREL-NEXT: v_mov_b32_e32 v0, s0 +; MOVREL-NEXT: s_mov_b32 m0, -1 +; MOVREL-NEXT: ds_write_b32 v0, v0 +; MOVREL-NEXT: s_endpgm +entry: + %ext = extractelement <8 x i8 addrspace(3)*> %vec, i32 %idx + store i8 addrspace(3)* %ext, i8 addrspace(3)* addrspace(3)* undef + ret void +} + +define i8 addrspace(1)* @dyn_extract_v8p1_v_v(<8 x i8 addrspace(1)*> %vec, i32 %idx) { +; GPRIDX-LABEL: dyn_extract_v8p1_v_v: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GPRIDX-NEXT: s_mov_b64 s[4:5], exec +; GPRIDX-NEXT: BB25_1: ; =>This Inner Loop Header: Depth=1 +; GPRIDX-NEXT: v_readfirstlane_b32 s6, v16 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16 +; GPRIDX-NEXT: s_lshl_b32 s6, s6, 1 +; GPRIDX-NEXT: s_add_u32 s7, s6, 1 +; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v17, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v18, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc +; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc +; GPRIDX-NEXT: s_cbranch_execnz BB25_1 +; GPRIDX-NEXT: ; %bb.2: +; GPRIDX-NEXT: s_mov_b64 exec, s[4:5] +; GPRIDX-NEXT: v_mov_b32_e32 v0, v17 +; GPRIDX-NEXT: v_mov_b32_e32 v1, v18 +; GPRIDX-NEXT: s_setpc_b64 s[30:31] +; +; MOVREL-LABEL: dyn_extract_v8p1_v_v: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; MOVREL-NEXT: s_mov_b64 s[4:5], exec +; MOVREL-NEXT: BB25_1: ; =>This Inner Loop Header: Depth=1 +; MOVREL-NEXT: v_readfirstlane_b32 s6, v16 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16 +; MOVREL-NEXT: s_lshl_b32 s6, s6, 1 +; MOVREL-NEXT: s_mov_b32 m0, s6 +; MOVREL-NEXT: s_add_u32 s7, s6, 1 +; MOVREL-NEXT: v_movrels_b32_e32 v17, v0 +; MOVREL-NEXT: s_mov_b32 m0, s7 +; MOVREL-NEXT: v_movrels_b32_e32 v18, v0 +; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc +; MOVREL-NEXT: s_xor_b64 exec, exec, vcc +; MOVREL-NEXT: s_cbranch_execnz BB25_1 +; MOVREL-NEXT: ; %bb.2: +; MOVREL-NEXT: s_mov_b64 exec, s[4:5] +; MOVREL-NEXT: v_mov_b32_e32 v0, v17 +; MOVREL-NEXT: v_mov_b32_e32 v1, v18 +; MOVREL-NEXT: s_setpc_b64 s[30:31] +entry: + %ext = extractelement <8 x i8 addrspace(1)*> %vec, i32 %idx + ret i8 addrspace(1)* %ext +} + +define amdgpu_ps void @dyn_extract_v8p1_s_s(<8 x i8 addrspace(1)*> inreg %vec, i32 inreg %idx) { +; GPRIDX-LABEL: dyn_extract_v8p1_s_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 m0, s18 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 s10, s12 +; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_mov_b32 s12, s14 +; GPRIDX-NEXT: s_mov_b32 s13, s15 +; GPRIDX-NEXT: s_mov_b32 s14, s16 +; GPRIDX-NEXT: s_mov_b32 s15, s17 +; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 +; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off +; GPRIDX-NEXT: s_endpgm +; +; MOVREL-LABEL: dyn_extract_v8p1_s_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 m0, s18 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 s10, s12 +; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_mov_b32 s12, s14 +; MOVREL-NEXT: s_mov_b32 s13, s15 +; MOVREL-NEXT: s_mov_b32 s14, s16 +; MOVREL-NEXT: s_mov_b32 s15, s17 +; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] +; MOVREL-NEXT: v_mov_b32_e32 v0, s0 +; MOVREL-NEXT: v_mov_b32_e32 v1, s1 +; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] +; MOVREL-NEXT: s_endpgm +entry: + %ext = extractelement <8 x i8 addrspace(1)*> %vec, i32 %idx + store i8 addrspace(1)* %ext, i8 addrspace(1)* addrspace(1)* undef + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir new file mode 100644 index 0000000000000..f2d53090f875f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir @@ -0,0 +1,810 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=MOVREL %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=MOVREL %s +# RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-vgpr-index-mode -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GPRIDX %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GPRIDX %s + +--- +name: extract_vector_elt_s_s32_v2s32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2 + + ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v2s32 + ; MOVREL: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v2s32 + ; GPRIDX: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GPRIDX: $m0 = COPY [[COPY1]] + ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + %0:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 + %1:sgpr(s32) = COPY $sgpr2 + %2:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: extract_vector_elt_s_s32_v3s32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2, $sgpr3 + + ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v3s32 + ; MOVREL: [[COPY:%[0-9]+]]:sreg_96 = COPY $sgpr0_sgpr1_sgpr2 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v3s32 + ; GPRIDX: [[COPY:%[0-9]+]]:sreg_96 = COPY $sgpr0_sgpr1_sgpr2 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GPRIDX: $m0 = COPY [[COPY1]] + ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + %0:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2 + %1:sgpr(s32) = COPY $sgpr2 + %2:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: extract_vector_elt_s_s32_v4s32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 + + ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v4s32 + ; MOVREL: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v4s32 + ; GPRIDX: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GPRIDX: $m0 = COPY [[COPY1]] + ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + %0:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(s32) = COPY $sgpr4 + %2:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: extract_vector_elt_s_s32_v8s32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8 + + ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v8s32 + ; MOVREL: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32 + ; GPRIDX: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX: $m0 = COPY [[COPY1]] + ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:sgpr(s32) = COPY $sgpr8 + %2:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: extract_vector_elt_s_s32_v16s32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + + ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v16s32 + ; MOVREL: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v16s32 + ; GPRIDX: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX: $m0 = COPY [[COPY1]] + ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + %0:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %1:sgpr(s32) = COPY $sgpr8 + %2:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: extract_vector_elt_s_s32_v32s32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, $sgpr40 + + ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v32s32 + ; MOVREL: [[COPY:%[0-9]+]]:sreg_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v32s32 + ; GPRIDX: [[COPY:%[0-9]+]]:sreg_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 + ; GPRIDX: $m0 = COPY [[COPY1]] + ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + %0:sgpr(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + %1:sgpr(s32) = COPY $sgpr40 + %2:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: extract_vector_elt_s_s64_v2s64 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 + + ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v2s64 + ; MOVREL: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v2s64 + ; GPRIDX: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GPRIDX: $m0 = COPY [[COPY1]] + ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + %0:sgpr(<2 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:sgpr(s32) = COPY $sgpr4 + %2:sgpr(s64) = G_EXTRACT_VECTOR_ELT %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: extract_vector_elt_s_s64_v4s64 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8 + + ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v4s64 + ; MOVREL: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v4s64 + ; GPRIDX: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX: $m0 = COPY [[COPY1]] + ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + %0:sgpr(<4 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:sgpr(s32) = COPY $sgpr8 + %2:sgpr(s64) = G_EXTRACT_VECTOR_ELT %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: extract_vector_elt_s_s64_v8s64 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + + ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v8s64 + ; MOVREL: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v8s64 + ; GPRIDX: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX: $m0 = COPY [[COPY1]] + ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + %0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %1:sgpr(s32) = COPY $sgpr8 + %2:sgpr(s64) = G_EXTRACT_VECTOR_ELT %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: extract_vector_elt_s_s64_v16s64 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, $sgpr40 + + ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v16s64 + ; MOVREL: [[COPY:%[0-9]+]]:sreg_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v16s64 + ; GPRIDX: [[COPY:%[0-9]+]]:sreg_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 + ; GPRIDX: $m0 = COPY [[COPY1]] + ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + %0:sgpr(<16 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + %1:sgpr(s32) = COPY $sgpr40 + %2:sgpr(s64) = G_EXTRACT_VECTOR_ELT %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: extract_vector_elt_s_s32_v8s32_idx_offset_1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8 + + ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_1 + ; MOVREL: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL: $m0 = COPY [[S_ADD_U32_]] + ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_1 + ; GPRIDX: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX: $m0 = COPY [[S_ADD_U32_]] + ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:sgpr(s32) = COPY $sgpr8 + %2:sgpr(s32) = G_CONSTANT i32 1 + %3:sgpr(s32) = G_ADD %1, %2 + %4:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %3 + S_ENDPGM 0, implicit %4 +... + +--- +name: extract_vector_elt_s_s32_v8s32_idx_offset_m1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8 + + ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_m1 + ; MOVREL: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 + ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL: $m0 = COPY [[S_ADD_U32_]] + ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_m1 + ; GPRIDX: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 + ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX: $m0 = COPY [[S_ADD_U32_]] + ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:sgpr(s32) = COPY $sgpr8 + %2:sgpr(s32) = G_CONSTANT i32 -1 + %3:sgpr(s32) = G_ADD %1, %2 + %4:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %3 + S_ENDPGM 0, implicit %4 +... + +--- +name: extract_vector_elt_s_s32_v8s32_idx_offset_7 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8 + + ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_7 + ; MOVREL: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 7 + ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL: $m0 = COPY [[S_ADD_U32_]] + ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_7 + ; GPRIDX: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 7 + ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX: $m0 = COPY [[S_ADD_U32_]] + ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:sgpr(s32) = COPY $sgpr8 + %2:sgpr(s32) = G_CONSTANT i32 7 + %3:sgpr(s32) = G_ADD %1, %2 + %4:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %3 + S_ENDPGM 0, implicit %4 +... + +--- +name: extract_vector_elt_s_s32_v8s32_idx_offset_8 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8 + + ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_8 + ; MOVREL: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 + ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL: $m0 = COPY [[S_ADD_U32_]] + ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_8 + ; GPRIDX: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 + ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX: $m0 = COPY [[S_ADD_U32_]] + ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + %1:sgpr(s32) = COPY $sgpr8 + %2:sgpr(s32) = G_CONSTANT i32 8 + %3:sgpr(s32) = G_ADD %1, %2 + %4:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %3 + S_ENDPGM 0, implicit %4 +... + +--- +name: extract_vector_elt_s_s64_v8s64_idx_offset_1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + + ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_1 + ; MOVREL: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL: $m0 = COPY [[S_ADD_U32_]] + ; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_1 + ; GPRIDX: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX: $m0 = COPY [[S_ADD_U32_]] + ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + %0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %1:sgpr(s32) = COPY $sgpr8 + %2:sgpr(s32) = G_CONSTANT i32 1 + %3:sgpr(s32) = G_ADD %1, %2 + %4:sgpr(s64) = G_EXTRACT_VECTOR_ELT %0, %3 + S_ENDPGM 0, implicit %4 +... + +--- +name: extract_vector_elt_s_s64_v8s64_idx_offset_2 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + + ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_2 + ; MOVREL: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2 + ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL: $m0 = COPY [[S_ADD_U32_]] + ; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_2 + ; GPRIDX: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2 + ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX: $m0 = COPY [[S_ADD_U32_]] + ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + %0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %1:sgpr(s32) = COPY $sgpr8 + %2:sgpr(s32) = G_CONSTANT i32 2 + %3:sgpr(s32) = G_ADD %1, %2 + %4:sgpr(s64) = G_EXTRACT_VECTOR_ELT %0, %3 + S_ENDPGM 0, implicit %4 +... + +--- +name: extract_vector_elt_s_s64_v8s64_idx_offset_m1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + + ; MOVREL-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_m1 + ; MOVREL: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 + ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL: $m0 = COPY [[S_ADD_U32_]] + ; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_m1 + ; GPRIDX: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 + ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX: $m0 = COPY [[S_ADD_U32_]] + ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64_xexec = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + %0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %1:sgpr(s32) = COPY $sgpr8 + %2:sgpr(s32) = G_CONSTANT i32 -1 + %3:sgpr(s32) = G_ADD %1, %2 + %4:sgpr(s64) = G_EXTRACT_VECTOR_ELT %0, %3 + S_ENDPGM 0, implicit %4 +... + +--- +name: extract_vector_elt_v_s32_v2s32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $sgpr2 + + ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v2s32 + ; MOVREL: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v2s32 + ; GPRIDX: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX: S_SET_GPR_IDX_OFF + ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:sgpr(s32) = COPY $sgpr2 + %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: extract_vector_elt_v_s32_v3s32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2, $sgpr3 + + ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v3s32 + ; MOVREL: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v3s32 + ; GPRIDX: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX: S_SET_GPR_IDX_OFF + ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + %0:vgpr(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:sgpr(s32) = COPY $sgpr2 + %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: extract_vector_elt_v_s32_v4s32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr4 + + ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v4s32 + ; MOVREL: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v4s32 + ; GPRIDX: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX: S_SET_GPR_IDX_OFF + ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + %0:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:sgpr(s32) = COPY $sgpr4 + %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: extract_vector_elt_v_s32_v8s32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 + + ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v8s32 + ; MOVREL: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32 + ; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX: S_SET_GPR_IDX_OFF + ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:sgpr(s32) = COPY $sgpr8 + %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: extract_vector_elt_v_s32_v16s32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + + ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v16s32 + ; MOVREL: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v16s32 + ; GPRIDX: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX: S_SET_GPR_IDX_OFF + ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + %0:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:sgpr(s32) = COPY $sgpr8 + %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: extract_vector_elt_v_s32_v32s32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $sgpr40 + + ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v32s32 + ; MOVREL: [[COPY:%[0-9]+]]:vreg_1024 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 + ; MOVREL: $m0 = COPY [[COPY1]] + ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v32s32 + ; GPRIDX: [[COPY:%[0-9]+]]:vreg_1024 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 + ; GPRIDX: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX: S_SET_GPR_IDX_OFF + ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + %0:vgpr(<32 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + %1:sgpr(s32) = COPY $sgpr40 + %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: extract_vector_elt_v_s32_v8s32_idx_offset_1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $sgpr8 + + ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_1 + ; MOVREL: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL: $m0 = COPY [[S_ADD_U32_]] + ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_1 + ; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_U32_]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX: S_SET_GPR_IDX_OFF + ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:sgpr(s32) = COPY $sgpr8 + %2:sgpr(s32) = G_CONSTANT i32 1 + %3:sgpr(s32) = G_ADD %1, %2 + %4:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %3 + S_ENDPGM 0, implicit %4 +... + +--- +name: extract_vector_elt_v_s32_v8s32_idx_offset_m1 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $sgpr8 + + ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_m1 + ; MOVREL: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 + ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL: $m0 = COPY [[S_ADD_U32_]] + ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_m1 + ; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 + ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_U32_]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX: S_SET_GPR_IDX_OFF + ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:sgpr(s32) = COPY $sgpr8 + %2:sgpr(s32) = G_CONSTANT i32 -1 + %3:sgpr(s32) = G_ADD %1, %2 + %4:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %3 + S_ENDPGM 0, implicit %4 +... + +--- +name: extract_vector_elt_v_s32_v8s32_idx_offset_7 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $sgpr8 + + ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_7 + ; MOVREL: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 7 + ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL: $m0 = COPY [[S_ADD_U32_]] + ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_7 + ; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 7 + ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_U32_]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX: S_SET_GPR_IDX_OFF + ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:sgpr(s32) = COPY $sgpr8 + %2:sgpr(s32) = G_CONSTANT i32 7 + %3:sgpr(s32) = G_ADD %1, %2 + %4:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %3 + S_ENDPGM 0, implicit %4 +... + +--- +name: extract_vector_elt_v_s32_v8s32_idx_offset_8 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $sgpr8 + + ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_8 + ; MOVREL: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 + ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL: $m0 = COPY [[S_ADD_U32_]] + ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_8 + ; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 + ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_U32_]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX: S_SET_GPR_IDX_OFF + ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:sgpr(s32) = COPY $sgpr8 + %2:sgpr(s32) = G_CONSTANT i32 8 + %3:sgpr(s32) = G_ADD %1, %2 + %4:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %3 + S_ENDPGM 0, implicit %4 +...