diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 03d16fdd54c42..dfa21515838ff 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -1241,46 +1241,123 @@ InstructionCost GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, (ScalarSize == 16 || ScalarSize == 8)) { // Larger vector widths may require additional instructions, but are // typically cheaper than scalarized versions. - unsigned NumVectorElts = cast(SrcTy)->getNumElements(); - unsigned RequestedElts = - count_if(Mask, [](int MaskElt) { return MaskElt != -1; }); + // + // We assume that shuffling at a register granularity can be done for free. + // This is not true for vectors fed into memory instructions, but it is + // effectively true for all other shuffling. The emphasis of the logic here + // is to assist generic transform in cleaning up / canonicalizing those + // shuffles. + + // With op_sel VOP3P instructions freely can access the low half or high + // half of a register, so any swizzle of two elements is free. + if (auto *SrcVecTy = dyn_cast(SrcTy)) { + unsigned NumSrcElts = SrcVecTy->getNumElements(); + if (ST->hasVOP3PInsts() && ScalarSize == 16 && NumSrcElts == 2 && + (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Reverse || + Kind == TTI::SK_PermuteSingleSrc)) + return 0; + } + unsigned EltsPerReg = 32 / ScalarSize; - if (RequestedElts == 0) - return 0; switch (Kind) { case TTI::SK_Broadcast: + // A single v_perm_b32 can be re-used for all destination registers. + return 1; case TTI::SK_Reverse: - case TTI::SK_PermuteSingleSrc: { - // With op_sel VOP3P instructions freely can access the low half or high - // half of a register, so any swizzle of two elements is free. - if (ST->hasVOP3PInsts() && ScalarSize == 16 && NumVectorElts == 2) - return 0; - unsigned NumPerms = alignTo(RequestedElts, EltsPerReg) / EltsPerReg; - // SK_Broadcast just reuses the same mask - unsigned NumPermMasks = Kind == TTI::SK_Broadcast ? 1 : NumPerms; - return NumPerms + NumPermMasks; - } + // One instruction per register. + if (auto *DstVecTy = dyn_cast(DstTy)) + return divideCeil(DstVecTy->getNumElements(), EltsPerReg); + return InstructionCost::getInvalid(); case TTI::SK_ExtractSubvector: + if (Index % EltsPerReg == 0) + return 0; // Shuffling at register granularity + if (auto *DstVecTy = dyn_cast(DstTy)) + return divideCeil(DstVecTy->getNumElements(), EltsPerReg); + return InstructionCost::getInvalid(); case TTI::SK_InsertSubvector: { - // Even aligned accesses are free - if (!(Index % 2)) - return 0; - // Insert/extract subvectors only require shifts / extract code to get the - // relevant bits - return alignTo(RequestedElts, EltsPerReg) / EltsPerReg; + auto *DstVecTy = dyn_cast(DstTy); + if (!DstVecTy) + return InstructionCost::getInvalid(); + unsigned NumDstElts = DstVecTy->getNumElements(); + unsigned NumInsertElts = cast(SubTp)->getNumElements(); + unsigned EndIndex = Index + NumInsertElts; + unsigned BeginSubIdx = Index % EltsPerReg; + unsigned EndSubIdx = EndIndex % EltsPerReg; + unsigned Cost = 0; + + if (BeginSubIdx != 0) { + // Need to shift the inserted vector into place. The cost is the number + // of destination registers overlapped by the inserted vector. + Cost = divideCeil(EndIndex, EltsPerReg) - (Index / EltsPerReg); + } + + // If the last register overlap is partial, there may be three source + // registers feeding into it; that takes an extra instruction. + if (EndIndex < NumDstElts && BeginSubIdx < EndSubIdx) + Cost += 1; + + return Cost; } - case TTI::SK_PermuteTwoSrc: - case TTI::SK_Splice: - case TTI::SK_Select: { - unsigned NumPerms = alignTo(RequestedElts, EltsPerReg) / EltsPerReg; - // SK_Select just reuses the same mask - unsigned NumPermMasks = Kind == TTI::SK_Select ? 1 : NumPerms; - return NumPerms + NumPermMasks; + case TTI::SK_Splice: { + auto *DstVecTy = dyn_cast(DstTy); + if (!DstVecTy) + return InstructionCost::getInvalid(); + unsigned NumElts = DstVecTy->getNumElements(); + assert(NumElts == cast(SrcTy)->getNumElements()); + // Determine the sub-region of the result vector that requires + // sub-register shuffles / mixing. + unsigned EltsFromLHS = NumElts - Index; + bool LHSIsAligned = (Index % EltsPerReg) == 0; + bool RHSIsAligned = (EltsFromLHS % EltsPerReg) == 0; + if (LHSIsAligned && RHSIsAligned) + return 0; + if (LHSIsAligned && !RHSIsAligned) + return divideCeil(NumElts, EltsPerReg) - (EltsFromLHS / EltsPerReg); + if (!LHSIsAligned && RHSIsAligned) + return divideCeil(EltsFromLHS, EltsPerReg); + return divideCeil(NumElts, EltsPerReg); } - default: break; } + + if (!Mask.empty()) { + unsigned NumSrcElts = cast(SrcTy)->getNumElements(); + + // Generically estimate the cost by assuming that each destination + // register is derived from sources via v_perm_b32 instructions if it + // can't be copied as-is. + // + // For each destination register, derive the cost of obtaining it based + // on the number of source registers that feed into it. + unsigned Cost = 0; + for (unsigned DstIdx = 0; DstIdx < Mask.size(); DstIdx += EltsPerReg) { + SmallVector Regs; + bool Aligned = true; + for (unsigned I = 0; I < EltsPerReg && DstIdx + I < Mask.size(); ++I) { + int SrcIdx = Mask[DstIdx + I]; + if (SrcIdx == -1) + continue; + int Reg; + if (SrcIdx < (int)NumSrcElts) { + Reg = SrcIdx / EltsPerReg; + if (SrcIdx % EltsPerReg != I) + Aligned = false; + } else { + Reg = NumSrcElts + (SrcIdx - NumSrcElts) / EltsPerReg; + if ((SrcIdx - NumSrcElts) % EltsPerReg != I) + Aligned = false; + } + if (!llvm::is_contained(Regs, Reg)) + Regs.push_back(Reg); + } + if (Regs.size() >= 2) + Cost += Regs.size() - 1; + else if (!Aligned) + Cost += 1; + } + return Cost; + } } return BaseT::getShuffleCost(Kind, DstTy, SrcTy, Mask, CostKind, Index, diff --git a/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll b/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll index b66e19e8bc563..78d43e8949269 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll @@ -13,153 +13,153 @@ define amdgpu_kernel void @shufflevector_i16(<2 x i16> %vec1, <2 x i16> %vec2) { ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf10 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf11 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf02 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf02 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf20 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf22 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf03 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf30 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf03 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf30 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf33 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf12 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf21 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf13 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf12 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf21 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf13 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf31 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf32 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf000 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> zeroinitializer -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf001 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf010 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf011 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf100 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf101 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf110 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf111 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf002 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf020 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf022 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf200 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf202 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf220 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf222 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf112 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf121 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf122 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf211 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf212 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf221 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf000 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> zeroinitializer +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf001 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf010 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf011 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf100 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf101 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf110 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf111 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf002 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf020 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf022 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf200 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf202 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf220 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf222 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf112 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf121 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf122 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf211 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf212 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf221 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf00_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> zeroinitializer ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf10_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf11_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf02_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf02_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf20_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf22_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf03_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf30_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf03_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf30_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf33_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf12_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf21_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf13_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf12_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf21_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf13_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf31_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf32_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf000_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> zeroinitializer -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf001_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf010_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf011_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf100_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf101_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf110_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf111_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf002_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf020_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf022_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf200_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf202_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf220_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf222_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf112_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf121_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf122_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf211_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf212_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf221_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf000_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> zeroinitializer +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf001_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf010_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf011_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf100_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf101_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf110_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf111_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf002_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf020_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf022_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf200_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf202_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf220_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf222_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf112_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf121_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf122_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf211_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf212_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf221_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; VI-LABEL: 'shufflevector_i16' -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf00 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> zeroinitializer +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf00 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> zeroinitializer ; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf10 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf11 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf02 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf10 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf11 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf02 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> ; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf20 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf22 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf03 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf30 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf33 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf12 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf21 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf13 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf22 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf03 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf30 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf33 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf12 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf21 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf13 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf31 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> ; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf32 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf000 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> zeroinitializer -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf001 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf010 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf011 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf100 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf101 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf110 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf111 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf002 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf020 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf022 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf200 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf202 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf220 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf222 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf112 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf121 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf122 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf211 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf212 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf221 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf00_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> zeroinitializer +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf32 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf000 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> zeroinitializer +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf001 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf010 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf011 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf100 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf101 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf110 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf111 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf002 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf020 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf022 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf200 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf202 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf220 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf222 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf112 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf121 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf122 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf211 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf212 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf221 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf00_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> zeroinitializer ; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf10_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf11_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf02_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf10_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf11_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf02_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> ; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf20_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf22_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf03_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf30_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf33_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf12_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf21_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf13_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf22_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf03_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf30_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf33_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf12_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf21_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf13_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf31_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> ; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf32_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf000_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> zeroinitializer -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf001_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf010_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf011_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf100_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf101_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf110_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf111_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf002_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf020_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf022_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf200_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf202_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf220_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf222_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf112_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf121_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf122_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf211_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf212_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf221_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf32_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf000_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> zeroinitializer +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf001_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf010_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf011_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf100_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf101_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf110_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf111_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf002_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf020_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf022_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf200_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf202_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf220_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf222_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf112_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf121_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf122_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf211_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf212_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf221_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> ; VI-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GFX9-10-SIZE-LABEL: 'shufflevector_i16' @@ -167,153 +167,153 @@ define amdgpu_kernel void @shufflevector_i16(<2 x i16> %vec1, <2 x i16> %vec2) { ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf10 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf11 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf02 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf02 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf20 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf22 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf03 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf30 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf03 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf30 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf33 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf12 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf21 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf13 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf12 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf21 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf13 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf31 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf32 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf000 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> zeroinitializer -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf001 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf010 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf011 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf100 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf101 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf110 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf111 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf002 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf020 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf022 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf200 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf202 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf220 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf222 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf112 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf121 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf122 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf211 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf212 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf221 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf000 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> zeroinitializer +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf001 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf010 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf011 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf100 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf101 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf110 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf111 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf002 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf020 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf022 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf200 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf202 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf220 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf222 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf112 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf121 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf122 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf211 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf212 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf221 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf00_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> zeroinitializer ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf10_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf11_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf02_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf02_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf20_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf22_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf03_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf30_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf03_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf30_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf33_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf12_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf21_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf13_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf12_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf21_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf13_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf31_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf32_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf000_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> zeroinitializer -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf001_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf010_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf011_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf100_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf101_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf110_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf111_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf002_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf020_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf022_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf200_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf202_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf220_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf222_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf112_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf121_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf122_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf211_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf212_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf221_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf000_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> zeroinitializer +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf001_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf010_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf011_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf100_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf101_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf110_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf111_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf002_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf020_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf022_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf200_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf202_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf220_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf222_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf112_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf121_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf122_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf211_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf212_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf221_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; VI-SIZE-LABEL: 'shufflevector_i16' -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf00 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> zeroinitializer +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf00 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> zeroinitializer ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf10 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf11 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf02 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf10 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf11 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf02 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf20 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf22 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf03 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf30 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf33 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf12 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf21 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf13 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf22 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf03 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf30 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf33 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf12 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf21 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf13 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf31 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf32 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf000 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> zeroinitializer -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf001 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf010 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf011 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf100 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf101 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf110 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf111 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf002 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf020 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf022 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf200 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf202 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf220 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf222 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf112 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf121 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf122 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf211 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf212 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf221 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf00_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> zeroinitializer +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf32 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf000 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> zeroinitializer +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf001 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf010 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf011 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf100 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf101 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf110 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf111 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf002 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf020 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf022 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf200 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf202 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf220 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf222 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf112 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf121 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf122 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf211 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf212 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf221 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf00_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> zeroinitializer ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf10_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf11_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf02_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf10_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf11_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf02_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf20_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf22_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf03_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf30_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf33_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf12_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf21_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf13_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf22_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf03_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf30_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf33_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf12_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf21_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf13_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf31_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf32_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf000_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> zeroinitializer -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf001_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf010_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf011_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf100_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf101_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf110_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shuf111_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf002_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf020_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf022_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf200_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf202_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf220_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf222_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf112_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf121_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf122_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf211_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf212_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf221_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf32_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf000_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> zeroinitializer +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf001_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf010_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf011_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf100_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf101_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf110_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf111_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf002_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf020_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf022_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf200_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf202_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf220_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf222_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf112_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf121_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf122_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf211_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf212_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf221_2 = shufflevector <2 x i16> %vec1, <2 x i16> %vec2, <3 x i32> ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %shuf00 = shufflevector <2 x i16> %vec1, <2 x i16> %vec1, <2 x i32> zeroinitializer @@ -396,157 +396,157 @@ define amdgpu_kernel void @shufflevector_i16(<2 x i16> %vec1, <2 x i16> %vec2) { ; Should not assert define amdgpu_kernel void @shufflevector_i8(<2 x i8> %vec1, <2 x i8> %vec2) { ; ALL-LABEL: 'shufflevector_i8' -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf00 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> zeroinitializer +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf00 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> zeroinitializer ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf10 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf11 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf02 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf10 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf11 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf02 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf20 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf22 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf03 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf30 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf33 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf12 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf21 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf13 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf22 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf03 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf30 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf33 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf12 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf21 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf13 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf31 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf32 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf000 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> zeroinitializer -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf001 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf010 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf011 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf100 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf101 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf110 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf111 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf002 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf020 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf022 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf200 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf202 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf220 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf222 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf112 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf121 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf122 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf211 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf212 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf221 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf00_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> zeroinitializer +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf32 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf000 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> zeroinitializer +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf001 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf010 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf011 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf100 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf101 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf110 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf111 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf002 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf020 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf022 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf200 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf202 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf220 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf222 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf112 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf121 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf122 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf211 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf212 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf221 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf00_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> zeroinitializer ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf10_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf11_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf02_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf10_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf11_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf02_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf20_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf22_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf03_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf30_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf33_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf12_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf21_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf13_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf22_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf03_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf30_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf33_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf12_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf21_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf13_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf31_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf32_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf000_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> zeroinitializer -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf001_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf010_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf011_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf100_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf101_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf110_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf111_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf002_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf020_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf022_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf200_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf202_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf220_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf222_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf112_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf121_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf122_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf211_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf212_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf221_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf32_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf000_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> zeroinitializer +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf001_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf010_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf011_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf100_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf101_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf110_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf111_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf002_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf020_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf022_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf200_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf202_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf220_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf222_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf112_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf121_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf122_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf211_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf212_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf221_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; ALL-SIZE-LABEL: 'shufflevector_i8' -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf00 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> zeroinitializer +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf00 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> zeroinitializer ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf10 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf11 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf02 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf10 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf11 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf02 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf20 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf22 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf03 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf30 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf33 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf12 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf21 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf13 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf22 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf03 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf30 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf33 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf12 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf21 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf13 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf31 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf32 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf000 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> zeroinitializer -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf001 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf010 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf011 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf100 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf101 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf110 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf111 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf002 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf020 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf022 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf200 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf202 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf220 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf222 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf112 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf121 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf122 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf211 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf212 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf221 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf00_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> zeroinitializer +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf32 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf000 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> zeroinitializer +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf001 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf010 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf011 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf100 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf101 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf110 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf111 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf002 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf020 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf022 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf200 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf202 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf220 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf222 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf112 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf121 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf122 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf211 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf212 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf221 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf00_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> zeroinitializer ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf01_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf10_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf11_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf02_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf10_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf11_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf02_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf20_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf22_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf03_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf30_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf33_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf12_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf21_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf13_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf22_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf03_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf30_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf33_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf12_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf21_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf13_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf31_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf32_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf000_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> zeroinitializer -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf001_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf010_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf011_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf100_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf101_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf110_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf111_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf002_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf020_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf022_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf200_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf202_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf220_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf222_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf112_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf121_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf122_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf211_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf212_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf221_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf32_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf000_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> zeroinitializer +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf001_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf010_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf011_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf100_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf101_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf110_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf111_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf002_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf020_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf022_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf200_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf202_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf220_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf222_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf112_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf121_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf122_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf211_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf212_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf221_2 = shufflevector <2 x i8> %vec1, <2 x i8> %vec2, <3 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %shuf00 = shufflevector <2 x i8> %vec1, <2 x i8> %vec1, <2 x i32> zeroinitializer @@ -861,28 +861,30 @@ define amdgpu_kernel void @shufflevector_i32(<2 x i32> %vec1, <2 x i32> %vec2) { ; Other shuffle cases define void @shuffle(<2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i8> %i8v4, <4 x i8> %i8v4_2, <6 x i8> %i8v6, <6 x i8> %i8v6_2, <8 x i8> %i8v8, <8 x i8> %i8v8_2, <16 x i8> %i8v16, <16 x i8> %i8v16_2, <2 x i16> %i16v2, <2 x i16> %i16v2_2, <4 x i16> %i16v4, <4 x i16> %i16v4_2, <8 x i16> %i16v8, <8 x i16> %i16v8_2, <2 x i32> %i32v2, <2 x i32> %i32v2_2, <4 x i32> %i32v4, <4 x i32> %i32v4_2, <2 x float> %floatv2, <2 x float> %floatv2_2, <4 x float> %floatv4, <4 x float> %floatv4_2,<2 x i64> %i64v2, <2 x i64> %i64v2_2,<2 x double> %doublev2, <2 x double> %doublev2_2) { ; GFX9-10-LABEL: 'shuffle' -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <2 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_2_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <2 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_4 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <4 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_4_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_4 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4, <4 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_4_2 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4_2, <4 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i8_8 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <8 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i8_8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <8 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8_8 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4, <8 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8_8_2 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4_2, <8 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6i8_8 = shufflevector <6 x i8> %i8v6, <6 x i8> %i8v6, <8 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6i8_8_2 = shufflevector <6 x i8> %i8v6, <6 x i8> %i8v6_2, <8 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_8 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8, <8 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_8_2 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8_16 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16, <16 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8_16_2 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <2 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_2_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <2 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_4 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <4 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_4_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_4 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4, <4 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_4_2 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4_2, <4 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_8 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <8 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <8 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_8 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4, <8 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_8_2 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4_2, <8 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v6i8_8 = shufflevector <6 x i8> %i8v6, <6 x i8> %i8v6, <8 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v6i8_8_2 = shufflevector <6 x i8> %i8v6, <6 x i8> %i8v6_2, <8 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_8 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8, <8 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_8_2 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i8_8_2_a = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_8_2_b = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_16 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16, <16 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_16_2 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16_2 = shufflevector <2 x i16> %i16v2, <2 x i16> %i16v2, <2 x i32> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16_2_2 = shufflevector <2 x i16> %i16v2, <2 x i16> %i16v2_2, <2 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16_4 = shufflevector <4 x i16> %i16v4, <4 x i16> %i16v4, <4 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16_4_2 = shufflevector <4 x i16> %i16v4, <4 x i16> %i16v4_2, <4 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16_8 = shufflevector <8 x i16> %i16v8, <8 x i16> %i16v8, <8 x i32> -; GFX9-10-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16_8_2 = shufflevector <8 x i16> %i16v8, <8 x i16> %i16v8_2, <8 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_4 = shufflevector <4 x i16> %i16v4, <4 x i16> %i16v4, <4 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_4_2 = shufflevector <4 x i16> %i16v4, <4 x i16> %i16v4_2, <4 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i16_8 = shufflevector <8 x i16> %i16v8, <8 x i16> %i16v8, <8 x i32> +; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i16_8_2 = shufflevector <8 x i16> %i16v8, <8 x i16> %i16v8_2, <8 x i32> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_2 = shufflevector <2 x i32> %i32v2, <2 x i32> %i32v2, <2 x i32> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_2_2 = shufflevector <2 x i32> %i32v2, <2 x i32> %i32v2_2, <2 x i32> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32_4 = shufflevector <4 x i32> %i32v4, <4 x i32> %i32v4, <4 x i32> @@ -898,28 +900,30 @@ define void @shuffle(<2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i8> %i8v4, <4 x i8> ; GFX9-10-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; VI-LABEL: 'shuffle' -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_2_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_4 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <4 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_4_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_4 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4, <4 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_4_2 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4_2, <4 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i8_8 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <8 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i8_8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <8 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8_8 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4, <8 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8_8_2 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4_2, <8 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6i8_8 = shufflevector <6 x i8> %i8v6, <6 x i8> %i8v6, <8 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6i8_8_2 = shufflevector <6 x i8> %i8v6, <6 x i8> %i8v6_2, <8 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_8 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8, <8 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_8_2 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8_16 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16, <16 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8_16_2 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16_2 = shufflevector <2 x i16> %i16v2, <2 x i16> %i16v2, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16_2_2 = shufflevector <2 x i16> %i16v2, <2 x i16> %i16v2_2, <2 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16_4 = shufflevector <4 x i16> %i16v4, <4 x i16> %i16v4, <4 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16_4_2 = shufflevector <4 x i16> %i16v4, <4 x i16> %i16v4_2, <4 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16_8 = shufflevector <8 x i16> %i16v8, <8 x i16> %i16v8, <8 x i32> -; VI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16_8_2 = shufflevector <8 x i16> %i16v8, <8 x i16> %i16v8_2, <8 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_2_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_4 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <4 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_4_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_4 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4, <4 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_4_2 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4_2, <4 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_8 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <8 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <8 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_8 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4, <8 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_8_2 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4_2, <8 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v6i8_8 = shufflevector <6 x i8> %i8v6, <6 x i8> %i8v6, <8 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v6i8_8_2 = shufflevector <6 x i8> %i8v6, <6 x i8> %i8v6_2, <8 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_8 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8, <8 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_8_2 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i8_8_2_a = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_8_2_b = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_16 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16, <16 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_16_2 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_2 = shufflevector <2 x i16> %i16v2, <2 x i16> %i16v2, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_2_2 = shufflevector <2 x i16> %i16v2, <2 x i16> %i16v2_2, <2 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_4 = shufflevector <4 x i16> %i16v4, <4 x i16> %i16v4, <4 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_4_2 = shufflevector <4 x i16> %i16v4, <4 x i16> %i16v4_2, <4 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i16_8 = shufflevector <8 x i16> %i16v8, <8 x i16> %i16v8, <8 x i32> +; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i16_8_2 = shufflevector <8 x i16> %i16v8, <8 x i16> %i16v8_2, <8 x i32> ; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_2 = shufflevector <2 x i32> %i32v2, <2 x i32> %i32v2, <2 x i32> ; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_2_2 = shufflevector <2 x i32> %i32v2, <2 x i32> %i32v2_2, <2 x i32> ; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32_4 = shufflevector <4 x i32> %i32v4, <4 x i32> %i32v4, <4 x i32> @@ -935,28 +939,30 @@ define void @shuffle(<2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i8> %i8v4, <4 x i8> ; VI-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GFX9-10-SIZE-LABEL: 'shuffle' -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <2 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_2_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <2 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_4 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <4 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_4_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_4 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4, <4 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_4_2 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4_2, <4 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i8_8 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <8 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i8_8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <8 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8_8 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4, <8 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8_8_2 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4_2, <8 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6i8_8 = shufflevector <6 x i8> %i8v6, <6 x i8> %i8v6, <8 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6i8_8_2 = shufflevector <6 x i8> %i8v6, <6 x i8> %i8v6_2, <8 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_8 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8, <8 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_8_2 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8_16 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16, <16 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8_16_2 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <2 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_2_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <2 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_4 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <4 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_4_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_4 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4, <4 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_4_2 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4_2, <4 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_8 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <8 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <8 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_8 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4, <8 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_8_2 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4_2, <8 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v6i8_8 = shufflevector <6 x i8> %i8v6, <6 x i8> %i8v6, <8 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v6i8_8_2 = shufflevector <6 x i8> %i8v6, <6 x i8> %i8v6_2, <8 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_8 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8, <8 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_8_2 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i8_8_2_a = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_8_2_b = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_16 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16, <16 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_16_2 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16_2 = shufflevector <2 x i16> %i16v2, <2 x i16> %i16v2, <2 x i32> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16_2_2 = shufflevector <2 x i16> %i16v2, <2 x i16> %i16v2_2, <2 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16_4 = shufflevector <4 x i16> %i16v4, <4 x i16> %i16v4, <4 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16_4_2 = shufflevector <4 x i16> %i16v4, <4 x i16> %i16v4_2, <4 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16_8 = shufflevector <8 x i16> %i16v8, <8 x i16> %i16v8, <8 x i32> -; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16_8_2 = shufflevector <8 x i16> %i16v8, <8 x i16> %i16v8_2, <8 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_4 = shufflevector <4 x i16> %i16v4, <4 x i16> %i16v4, <4 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_4_2 = shufflevector <4 x i16> %i16v4, <4 x i16> %i16v4_2, <4 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i16_8 = shufflevector <8 x i16> %i16v8, <8 x i16> %i16v8, <8 x i32> +; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i16_8_2 = shufflevector <8 x i16> %i16v8, <8 x i16> %i16v8_2, <8 x i32> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_2 = shufflevector <2 x i32> %i32v2, <2 x i32> %i32v2, <2 x i32> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_2_2 = shufflevector <2 x i32> %i32v2, <2 x i32> %i32v2_2, <2 x i32> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32_4 = shufflevector <4 x i32> %i32v4, <4 x i32> %i32v4, <4 x i32> @@ -972,28 +978,30 @@ define void @shuffle(<2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i8> %i8v4, <4 x i8> ; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; VI-SIZE-LABEL: 'shuffle' -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_2_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_4 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <4 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_4_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_4 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4, <4 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_4_2 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4_2, <4 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i8_8 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <8 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i8_8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <8 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8_8 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4, <8 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8_8_2 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4_2, <8 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6i8_8 = shufflevector <6 x i8> %i8v6, <6 x i8> %i8v6, <8 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6i8_8_2 = shufflevector <6 x i8> %i8v6, <6 x i8> %i8v6_2, <8 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_8 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8, <8 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_8_2 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8_16 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16, <16 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8_16_2 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16_2 = shufflevector <2 x i16> %i16v2, <2 x i16> %i16v2, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16_2_2 = shufflevector <2 x i16> %i16v2, <2 x i16> %i16v2_2, <2 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16_4 = shufflevector <4 x i16> %i16v4, <4 x i16> %i16v4, <4 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16_4_2 = shufflevector <4 x i16> %i16v4, <4 x i16> %i16v4_2, <4 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16_8 = shufflevector <8 x i16> %i16v8, <8 x i16> %i16v8, <8 x i32> -; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16_8_2 = shufflevector <8 x i16> %i16v8, <8 x i16> %i16v8_2, <8 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_2_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_4 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <4 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_4_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_4 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4, <4 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_4_2 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4_2, <4 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_8 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <8 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <8 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_8 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4, <8 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_8_2 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4_2, <8 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v6i8_8 = shufflevector <6 x i8> %i8v6, <6 x i8> %i8v6, <8 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v6i8_8_2 = shufflevector <6 x i8> %i8v6, <6 x i8> %i8v6_2, <8 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_8 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8, <8 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_8_2 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i8_8_2_a = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8_8_2_b = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_16 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16, <16 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_16_2 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_2 = shufflevector <2 x i16> %i16v2, <2 x i16> %i16v2, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_2_2 = shufflevector <2 x i16> %i16v2, <2 x i16> %i16v2_2, <2 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_4 = shufflevector <4 x i16> %i16v4, <4 x i16> %i16v4, <4 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_4_2 = shufflevector <4 x i16> %i16v4, <4 x i16> %i16v4_2, <4 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i16_8 = shufflevector <8 x i16> %i16v8, <8 x i16> %i16v8, <8 x i32> +; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i16_8_2 = shufflevector <8 x i16> %i16v8, <8 x i16> %i16v8_2, <8 x i32> ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_2 = shufflevector <2 x i32> %i32v2, <2 x i32> %i32v2, <2 x i32> ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_2_2 = shufflevector <2 x i32> %i32v2, <2 x i32> %i32v2_2, <2 x i32> ; VI-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32_4 = shufflevector <4 x i32> %i32v4, <4 x i32> %i32v4, <4 x i32> @@ -1022,6 +1030,8 @@ define void @shuffle(<2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i8> %i8v4, <4 x i8> %v6i8_8_2 = shufflevector <6 x i8> %i8v6, <6 x i8> %i8v6_2, <8 x i32> %v8i8_8 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8, <8 x i32> %v8i8_8_2 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> + %v8i8_8_2_a = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> + %v8i8_8_2_b = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <8 x i32> %v16i8_16 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16, <16 x i32> %v16i8_16_2 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> %v2i16_2 = shufflevector <2 x i16> %i16v2, <2 x i16> %i16v2, <2 x i32> @@ -1047,7 +1057,7 @@ define void @shuffle(<2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i8> %i8v4, <4 x i8> define void @concat(<2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i8> %i8v4, <4 x i8> %i8v4_2, <8 x i8> %i8v8, <8 x i8> %i8v8_2, <2 x half> %halfv2, <2 x half> %halfv2_2, <4 x half> %halfv4, <4 x half> %halfv4_2, <8 x half> %halfv8, <8 x half> %halfv8_2, <2 x i16> %i16v2, <2 x i16> %i16v2_2, <4 x i16> %i16v4, <4 x i16> %i16v4_2, <8 x i16> %i16v8, <8 x i16> %i16v8_2, <2 x i32> %i32v2, <2 x i32> %i32v2_2, <4 x i32> %i32v4, <4 x i32> %i32v4_2, <2 x float> %floatv2, <2 x float> %floatv2_2, <4 x float> %floatv4, <4 x float> %floatv4_2,<2 x i64> %i64v2, <2 x i64> %i64v2_2,<2 x double> %doublev2, <2 x double> %doublev2_2) { ; ALL-LABEL: 'concat' -; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i8 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <4 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <4 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i8 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4, <8 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i8 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8, <16 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i16 = shufflevector <2 x i16> %i16v2, <2 x i16> %i16v2, <4 x i32> @@ -1062,7 +1072,7 @@ define void @concat(<2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i8> %i8v4, <4 x i8> % ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32 = shufflevector <2 x float> %floatv2, <2 x float> %floatv2, <4 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32 = shufflevector <4 x float> %floatv4, <4 x float> %floatv4, <8 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64 = shufflevector <2 x double> %doublev2, <2 x double> %doublev2, <4 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i8_2 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4_2, <8 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i8_2 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <16 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i16_2 = shufflevector <2 x i16> %i16v2, <2 x i16> %i16v2_2, <4 x i32> @@ -1080,7 +1090,7 @@ define void @concat(<2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i8> %i8v4, <4 x i8> % ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; ALL-SIZE-LABEL: 'concat' -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i8 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <4 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2, <4 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i8 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4, <8 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i8 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8, <16 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i16 = shufflevector <2 x i16> %i16v2, <2 x i16> %i16v2, <4 x i32> @@ -1095,7 +1105,7 @@ define void @concat(<2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i8> %i8v4, <4 x i8> % ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32 = shufflevector <2 x float> %floatv2, <2 x float> %floatv2, <4 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32 = shufflevector <4 x float> %floatv4, <4 x float> %floatv4, <8 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64 = shufflevector <2 x double> %doublev2, <2 x double> %doublev2, <4 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_2 = shufflevector <2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i8_2 = shufflevector <4 x i8> %i8v4, <4 x i8> %i8v4_2, <8 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i8_2 = shufflevector <8 x i8> %i8v8, <8 x i8> %i8v8_2, <16 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i16_2 = shufflevector <2 x i16> %i16v2, <2 x i16> %i16v2_2, <4 x i32> @@ -1147,25 +1157,25 @@ define void @concat(<2 x i8> %i8v2, <2 x i8> %i8v2_2, <4 x i8> %i8v4, <4 x i8> % define void @insert(<16 x i8> %i8v16, <16 x i8> %i8v16_2) { ; ALL-LABEL: 'insert' -; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %test0 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %test1 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %test2 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %test0 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %test1 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %test2 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %test3 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %test4 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %test4 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %test5 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %test6 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %test7 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %test6 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %test7 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; ALL-SIZE-LABEL: 'insert' -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %test0 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %test1 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %test2 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %test0 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %test1 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %test2 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %test3 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %test4 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %test4 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %test5 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %test6 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %test7 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %test6 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %test7 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %test0 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> @@ -1181,13 +1191,13 @@ define void @insert(<16 x i8> %i8v16, <16 x i8> %i8v16_2) { define void @splice(<16 x i8> %i8v16, <16 x i8> %i8v16_2) { ; ALL-LABEL: 'splice' -; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %test0 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> -; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %test1 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %test0 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %test1 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> ; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; ALL-SIZE-LABEL: 'splice' -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %test0 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %test1 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %test0 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %test1 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %test0 = shufflevector <16 x i8> %i8v16, <16 x i8> %i8v16_2, <16 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/reduction.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/reduction.ll index b2246e4f9c6c4..126d195314772 100644 --- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/reduction.ll +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/reduction.ll @@ -3,10 +3,21 @@ ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -passes=slp-vectorizer,dce < %s | FileCheck -check-prefixes=GCN,VI %s define half @reduction_half4(<4 x half> %a) { -; GCN-LABEL: @reduction_half4( -; GCN-NEXT: entry: -; GCN-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[A:%.*]]) -; GCN-NEXT: ret half [[TMP0]] +; GFX9-LABEL: @reduction_half4( +; GFX9-NEXT: entry: +; GFX9-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[A:%.*]]) +; GFX9-NEXT: ret half [[TMP0]] +; +; VI-LABEL: @reduction_half4( +; VI-NEXT: entry: +; VI-NEXT: [[ELT0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0 +; VI-NEXT: [[ELT1:%.*]] = extractelement <4 x half> [[A]], i64 1 +; VI-NEXT: [[ELT2:%.*]] = extractelement <4 x half> [[A]], i64 2 +; VI-NEXT: [[ELT3:%.*]] = extractelement <4 x half> [[A]], i64 3 +; VI-NEXT: [[ADD1:%.*]] = fadd fast half [[ELT1]], [[ELT0]] +; VI-NEXT: [[ADD2:%.*]] = fadd fast half [[ELT2]], [[ADD1]] +; VI-NEXT: [[ADD3:%.*]] = fadd fast half [[ELT3]], [[ADD2]] +; VI-NEXT: ret half [[ADD3]] ; entry: %elt0 = extractelement <4 x half> %a, i64 0 @@ -22,10 +33,29 @@ entry: } define half @reduction_half8(<8 x half> %vec8) { -; GCN-LABEL: @reduction_half8( -; GCN-NEXT: entry: -; GCN-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> [[VEC8:%.*]]) -; GCN-NEXT: ret half [[TMP0]] +; GFX9-LABEL: @reduction_half8( +; GFX9-NEXT: entry: +; GFX9-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> [[VEC8:%.*]]) +; GFX9-NEXT: ret half [[TMP0]] +; +; VI-LABEL: @reduction_half8( +; VI-NEXT: entry: +; VI-NEXT: [[ELT0:%.*]] = extractelement <8 x half> [[VEC8:%.*]], i64 0 +; VI-NEXT: [[ELT1:%.*]] = extractelement <8 x half> [[VEC8]], i64 1 +; VI-NEXT: [[ELT2:%.*]] = extractelement <8 x half> [[VEC8]], i64 2 +; VI-NEXT: [[ELT3:%.*]] = extractelement <8 x half> [[VEC8]], i64 3 +; VI-NEXT: [[ELT4:%.*]] = extractelement <8 x half> [[VEC8]], i64 4 +; VI-NEXT: [[ELT5:%.*]] = extractelement <8 x half> [[VEC8]], i64 5 +; VI-NEXT: [[ELT6:%.*]] = extractelement <8 x half> [[VEC8]], i64 6 +; VI-NEXT: [[ELT7:%.*]] = extractelement <8 x half> [[VEC8]], i64 7 +; VI-NEXT: [[ADD1:%.*]] = fadd fast half [[ELT1]], [[ELT0]] +; VI-NEXT: [[ADD2:%.*]] = fadd fast half [[ELT2]], [[ADD1]] +; VI-NEXT: [[ADD3:%.*]] = fadd fast half [[ELT3]], [[ADD2]] +; VI-NEXT: [[ADD4:%.*]] = fadd fast half [[ELT4]], [[ADD3]] +; VI-NEXT: [[ADD5:%.*]] = fadd fast half [[ELT5]], [[ADD4]] +; VI-NEXT: [[ADD6:%.*]] = fadd fast half [[ELT6]], [[ADD5]] +; VI-NEXT: [[ADD7:%.*]] = fadd fast half [[ELT7]], [[ADD6]] +; VI-NEXT: ret half [[ADD7]] ; entry: %elt0 = extractelement <8 x half> %vec8, i64 0 @@ -56,10 +86,37 @@ define half @reduction_half16(<16 x half> %vec16) { ; ; VI-LABEL: @reduction_half16( ; VI-NEXT: entry: -; VI-NEXT: [[TMP0:%.*]] = shufflevector <16 x half> [[VEC16:%.*]], <16 x half> poison, <8 x i32> -; VI-NEXT: [[TMP2:%.*]] = shufflevector <16 x half> [[VEC16]], <16 x half> poison, <8 x i32> -; VI-NEXT: [[RDX_OP:%.*]] = fadd fast <8 x half> [[TMP0]], [[TMP2]] -; VI-NEXT: [[OP_RDX:%.*]] = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> [[RDX_OP]]) +; VI-NEXT: [[ELT0:%.*]] = extractelement <16 x half> [[VEC16:%.*]], i64 0 +; VI-NEXT: [[ELT1:%.*]] = extractelement <16 x half> [[VEC16]], i64 1 +; VI-NEXT: [[ELT2:%.*]] = extractelement <16 x half> [[VEC16]], i64 2 +; VI-NEXT: [[ELT3:%.*]] = extractelement <16 x half> [[VEC16]], i64 3 +; VI-NEXT: [[ELT4:%.*]] = extractelement <16 x half> [[VEC16]], i64 4 +; VI-NEXT: [[ELT5:%.*]] = extractelement <16 x half> [[VEC16]], i64 5 +; VI-NEXT: [[ELT6:%.*]] = extractelement <16 x half> [[VEC16]], i64 6 +; VI-NEXT: [[ELT7:%.*]] = extractelement <16 x half> [[VEC16]], i64 7 +; VI-NEXT: [[ELT8:%.*]] = extractelement <16 x half> [[VEC16]], i64 8 +; VI-NEXT: [[ELT9:%.*]] = extractelement <16 x half> [[VEC16]], i64 9 +; VI-NEXT: [[ELT10:%.*]] = extractelement <16 x half> [[VEC16]], i64 10 +; VI-NEXT: [[ELT11:%.*]] = extractelement <16 x half> [[VEC16]], i64 11 +; VI-NEXT: [[ELT12:%.*]] = extractelement <16 x half> [[VEC16]], i64 12 +; VI-NEXT: [[ELT13:%.*]] = extractelement <16 x half> [[VEC16]], i64 13 +; VI-NEXT: [[ELT14:%.*]] = extractelement <16 x half> [[VEC16]], i64 14 +; VI-NEXT: [[ELT15:%.*]] = extractelement <16 x half> [[VEC16]], i64 15 +; VI-NEXT: [[ADD1:%.*]] = fadd fast half [[ELT1]], [[ELT0]] +; VI-NEXT: [[ADD2:%.*]] = fadd fast half [[ELT2]], [[ADD1]] +; VI-NEXT: [[ADD3:%.*]] = fadd fast half [[ELT3]], [[ADD2]] +; VI-NEXT: [[ADD4:%.*]] = fadd fast half [[ELT4]], [[ADD3]] +; VI-NEXT: [[ADD5:%.*]] = fadd fast half [[ELT5]], [[ADD4]] +; VI-NEXT: [[ADD6:%.*]] = fadd fast half [[ELT6]], [[ADD5]] +; VI-NEXT: [[ADD7:%.*]] = fadd fast half [[ELT7]], [[ADD6]] +; VI-NEXT: [[ADD8:%.*]] = fadd fast half [[ELT8]], [[ADD7]] +; VI-NEXT: [[ADD9:%.*]] = fadd fast half [[ELT9]], [[ADD8]] +; VI-NEXT: [[ADD10:%.*]] = fadd fast half [[ELT10]], [[ADD9]] +; VI-NEXT: [[ADD11:%.*]] = fadd fast half [[ELT11]], [[ADD10]] +; VI-NEXT: [[ADD12:%.*]] = fadd fast half [[ELT12]], [[ADD11]] +; VI-NEXT: [[ADD13:%.*]] = fadd fast half [[ELT13]], [[ADD12]] +; VI-NEXT: [[ADD14:%.*]] = fadd fast half [[ELT14]], [[ADD13]] +; VI-NEXT: [[OP_RDX:%.*]] = fadd fast half [[ELT15]], [[ADD14]] ; VI-NEXT: ret half [[OP_RDX]] ; entry: @@ -126,10 +183,21 @@ entry: } define i16 @reduction_v4i16(<4 x i16> %a) { -; GCN-LABEL: @reduction_v4i16( -; GCN-NEXT: entry: -; GCN-NEXT: [[TMP0:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[A:%.*]]) -; GCN-NEXT: ret i16 [[TMP0]] +; GFX9-LABEL: @reduction_v4i16( +; GFX9-NEXT: entry: +; GFX9-NEXT: [[TMP0:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[A:%.*]]) +; GFX9-NEXT: ret i16 [[TMP0]] +; +; VI-LABEL: @reduction_v4i16( +; VI-NEXT: entry: +; VI-NEXT: [[ELT0:%.*]] = extractelement <4 x i16> [[A:%.*]], i64 0 +; VI-NEXT: [[ELT1:%.*]] = extractelement <4 x i16> [[A]], i64 1 +; VI-NEXT: [[ELT2:%.*]] = extractelement <4 x i16> [[A]], i64 2 +; VI-NEXT: [[ELT3:%.*]] = extractelement <4 x i16> [[A]], i64 3 +; VI-NEXT: [[ADD1:%.*]] = add i16 [[ELT1]], [[ELT0]] +; VI-NEXT: [[ADD2:%.*]] = add i16 [[ELT2]], [[ADD1]] +; VI-NEXT: [[ADD3:%.*]] = add i16 [[ELT3]], [[ADD2]] +; VI-NEXT: ret i16 [[ADD3]] ; entry: %elt0 = extractelement <4 x i16> %a, i64 0 @@ -145,10 +213,29 @@ entry: } define i16 @reduction_v8i16(<8 x i16> %vec8) { -; GCN-LABEL: @reduction_v8i16( -; GCN-NEXT: entry: -; GCN-NEXT: [[TMP0:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[VEC8:%.*]]) -; GCN-NEXT: ret i16 [[TMP0]] +; GFX9-LABEL: @reduction_v8i16( +; GFX9-NEXT: entry: +; GFX9-NEXT: [[TMP0:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[VEC8:%.*]]) +; GFX9-NEXT: ret i16 [[TMP0]] +; +; VI-LABEL: @reduction_v8i16( +; VI-NEXT: entry: +; VI-NEXT: [[ELT0:%.*]] = extractelement <8 x i16> [[VEC8:%.*]], i64 0 +; VI-NEXT: [[ELT1:%.*]] = extractelement <8 x i16> [[VEC8]], i64 1 +; VI-NEXT: [[ELT2:%.*]] = extractelement <8 x i16> [[VEC8]], i64 2 +; VI-NEXT: [[ELT3:%.*]] = extractelement <8 x i16> [[VEC8]], i64 3 +; VI-NEXT: [[ELT4:%.*]] = extractelement <8 x i16> [[VEC8]], i64 4 +; VI-NEXT: [[ELT5:%.*]] = extractelement <8 x i16> [[VEC8]], i64 5 +; VI-NEXT: [[ELT6:%.*]] = extractelement <8 x i16> [[VEC8]], i64 6 +; VI-NEXT: [[ELT7:%.*]] = extractelement <8 x i16> [[VEC8]], i64 7 +; VI-NEXT: [[ADD1:%.*]] = add i16 [[ELT1]], [[ELT0]] +; VI-NEXT: [[ADD2:%.*]] = add i16 [[ELT2]], [[ADD1]] +; VI-NEXT: [[ADD3:%.*]] = add i16 [[ELT3]], [[ADD2]] +; VI-NEXT: [[ADD4:%.*]] = add i16 [[ELT4]], [[ADD3]] +; VI-NEXT: [[ADD5:%.*]] = add i16 [[ELT5]], [[ADD4]] +; VI-NEXT: [[ADD6:%.*]] = add i16 [[ELT6]], [[ADD5]] +; VI-NEXT: [[ADD7:%.*]] = add i16 [[ELT7]], [[ADD6]] +; VI-NEXT: ret i16 [[ADD7]] ; entry: %elt0 = extractelement <8 x i16> %vec8, i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/slp-v2f16.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/slp-v2f16.ll index c9a013bd58322..d8d4460accf7c 100644 --- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/slp-v2f16.ll +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/slp-v2f16.ll @@ -434,34 +434,18 @@ bb: } define void @copysign_combine_v2f16(ptr addrspace(1) %arg, half %sign) { -; GFX8-LABEL: define void @copysign_combine_v2f16( -; GFX8-SAME: ptr addrspace(1) [[ARG:%.*]], half [[SIGN:%.*]]) #[[ATTR0]] { -; GFX8-NEXT: [[BB:.*:]] -; GFX8-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() -; GFX8-NEXT: [[ITMP1:%.*]] = zext i32 [[TMP]] to i64 -; GFX8-NEXT: [[ITMP2:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[ARG]], i64 [[ITMP1]] -; GFX8-NEXT: [[ITMP3:%.*]] = load half, ptr addrspace(1) [[ITMP2]], align 2 -; GFX8-NEXT: [[ITMP4:%.*]] = call half @llvm.copysign.f16(half [[ITMP3]], half [[SIGN]]) -; GFX8-NEXT: store half [[ITMP4]], ptr addrspace(1) [[ITMP2]], align 2 -; GFX8-NEXT: [[ITMP5:%.*]] = add nuw nsw i64 [[ITMP1]], 1 -; GFX8-NEXT: [[ITMP6:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[ARG]], i64 [[ITMP5]] -; GFX8-NEXT: [[ITMP7:%.*]] = load half, ptr addrspace(1) [[ITMP6]], align 2 -; GFX8-NEXT: [[ITMP8:%.*]] = call half @llvm.copysign.f16(half [[ITMP7]], half [[SIGN]]) -; GFX8-NEXT: store half [[ITMP8]], ptr addrspace(1) [[ITMP6]], align 2 -; GFX8-NEXT: ret void -; -; GFX9-LABEL: define void @copysign_combine_v2f16( -; GFX9-SAME: ptr addrspace(1) [[ARG:%.*]], half [[SIGN:%.*]]) #[[ATTR0]] { -; GFX9-NEXT: [[BB:.*:]] -; GFX9-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() -; GFX9-NEXT: [[ITMP1:%.*]] = zext i32 [[TMP]] to i64 -; GFX9-NEXT: [[ITMP2:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[ARG]], i64 [[ITMP1]] -; GFX9-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr addrspace(1) [[ITMP2]], align 2 -; GFX9-NEXT: [[TMP1:%.*]] = insertelement <2 x half> poison, half [[SIGN]], i32 0 -; GFX9-NEXT: [[TMP2:%.*]] = shufflevector <2 x half> [[TMP1]], <2 x half> poison, <2 x i32> zeroinitializer -; GFX9-NEXT: [[TMP3:%.*]] = call <2 x half> @llvm.copysign.v2f16(<2 x half> [[TMP0]], <2 x half> [[TMP2]]) -; GFX9-NEXT: store <2 x half> [[TMP3]], ptr addrspace(1) [[ITMP2]], align 2 -; GFX9-NEXT: ret void +; GCN-LABEL: define void @copysign_combine_v2f16( +; GCN-SAME: ptr addrspace(1) [[ARG:%.*]], half [[SIGN:%.*]]) #[[ATTR0]] { +; GCN-NEXT: [[BB:.*:]] +; GCN-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() +; GCN-NEXT: [[ITMP1:%.*]] = zext i32 [[TMP]] to i64 +; GCN-NEXT: [[ITMP2:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[ARG]], i64 [[ITMP1]] +; GCN-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr addrspace(1) [[ITMP2]], align 2 +; GCN-NEXT: [[TMP1:%.*]] = insertelement <2 x half> poison, half [[SIGN]], i32 0 +; GCN-NEXT: [[TMP2:%.*]] = shufflevector <2 x half> [[TMP1]], <2 x half> poison, <2 x i32> zeroinitializer +; GCN-NEXT: [[TMP3:%.*]] = call <2 x half> @llvm.copysign.v2f16(<2 x half> [[TMP0]], <2 x half> [[TMP2]]) +; GCN-NEXT: store <2 x half> [[TMP3]], ptr addrspace(1) [[ITMP2]], align 2 +; GCN-NEXT: ret void ; bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -487,14 +471,13 @@ define void @copysign_combine_v4f16(ptr addrspace(1) %arg, half %sign) { ; GFX8-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() ; GFX8-NEXT: [[ITMP1:%.*]] = zext i32 [[TMP]] to i64 ; GFX8-NEXT: [[ITMP2:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[ARG]], i64 [[ITMP1]] -; GFX8-NEXT: [[ITMP3:%.*]] = load half, ptr addrspace(1) [[ITMP2]], align 2 -; GFX8-NEXT: [[ITMP4:%.*]] = call half @llvm.copysign.f16(half [[ITMP3]], half [[SIGN]]) -; GFX8-NEXT: store half [[ITMP4]], ptr addrspace(1) [[ITMP2]], align 2 ; GFX8-NEXT: [[ITMP5:%.*]] = add nuw nsw i64 [[ITMP1]], 1 ; GFX8-NEXT: [[ITMP6:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[ARG]], i64 [[ITMP5]] -; GFX8-NEXT: [[ITMP7:%.*]] = load half, ptr addrspace(1) [[ITMP6]], align 2 -; GFX8-NEXT: [[ITMP8:%.*]] = call half @llvm.copysign.f16(half [[ITMP7]], half [[SIGN]]) -; GFX8-NEXT: store half [[ITMP8]], ptr addrspace(1) [[ITMP6]], align 2 +; GFX8-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr addrspace(1) [[ITMP2]], align 2 +; GFX8-NEXT: [[TMP1:%.*]] = insertelement <2 x half> poison, half [[SIGN]], i32 0 +; GFX8-NEXT: [[TMP2:%.*]] = shufflevector <2 x half> [[TMP1]], <2 x half> poison, <2 x i32> zeroinitializer +; GFX8-NEXT: [[TMP3:%.*]] = call <2 x half> @llvm.copysign.v2f16(<2 x half> [[TMP0]], <2 x half> [[TMP2]]) +; GFX8-NEXT: store <2 x half> [[TMP3]], ptr addrspace(1) [[ITMP2]], align 2 ; GFX8-NEXT: [[ITMP9:%.*]] = add nuw nsw i64 [[ITMP1]], 2 ; GFX8-NEXT: [[ITMP10:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[ARG]], i64 [[ITMP9]] ; GFX8-NEXT: [[ITMP11:%.*]] = load half, ptr addrspace(1) [[ITMP6]], align 2 diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-chain-to-shuffles.ll b/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-chain-to-shuffles.ll index ca22e44d1c97f..b6ba6eb984c85 100644 --- a/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-chain-to-shuffles.ll +++ b/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-chain-to-shuffles.ll @@ -8,37 +8,7 @@ define amdgpu_kernel void @extract_insert_chain_to_shuffles(<16 x i8> %in, <16 x ; OPT-LABEL: define amdgpu_kernel void @extract_insert_chain_to_shuffles( ; OPT-SAME: <16 x i8> [[IN:%.*]], <16 x i8> [[ADD:%.*]], ptr addrspace(3) [[OUT:%.*]]) #[[ATTR0:[0-9]+]] { ; OPT-NEXT: [[ENTRY:.*:]] -; OPT-NEXT: [[TMP226:%.*]] = extractelement <16 x i8> [[IN]], i64 1 -; OPT-NEXT: [[TMP228:%.*]] = extractelement <16 x i8> [[IN]], i64 2 -; OPT-NEXT: [[TMP230:%.*]] = extractelement <16 x i8> [[IN]], i64 3 -; OPT-NEXT: [[TMP232:%.*]] = extractelement <16 x i8> [[IN]], i64 4 -; OPT-NEXT: [[TMP234:%.*]] = extractelement <16 x i8> [[IN]], i64 5 -; OPT-NEXT: [[TMP236:%.*]] = extractelement <16 x i8> [[IN]], i64 6 -; OPT-NEXT: [[TMP238:%.*]] = extractelement <16 x i8> [[IN]], i64 7 -; OPT-NEXT: [[TMP240:%.*]] = extractelement <16 x i8> [[IN]], i64 8 -; OPT-NEXT: [[TMP242:%.*]] = extractelement <16 x i8> [[IN]], i64 9 -; OPT-NEXT: [[TMP244:%.*]] = extractelement <16 x i8> [[IN]], i64 10 -; OPT-NEXT: [[TMP246:%.*]] = extractelement <16 x i8> [[IN]], i64 11 -; OPT-NEXT: [[TMP248:%.*]] = extractelement <16 x i8> [[IN]], i64 12 -; OPT-NEXT: [[TMP250:%.*]] = extractelement <16 x i8> [[IN]], i64 13 -; OPT-NEXT: [[TMP252:%.*]] = extractelement <16 x i8> [[IN]], i64 14 -; OPT-NEXT: [[TMP256:%.*]] = shufflevector <16 x i8> [[IN]], <16 x i8> poison, <16 x i32> -; OPT-NEXT: [[I257:%.*]] = insertelement <16 x i8> [[TMP256]], i8 [[TMP226]], i64 1 -; OPT-NEXT: [[I258:%.*]] = insertelement <16 x i8> [[I257]], i8 [[TMP228]], i64 2 -; OPT-NEXT: [[I259:%.*]] = insertelement <16 x i8> [[I258]], i8 [[TMP230]], i64 3 -; OPT-NEXT: [[I260:%.*]] = insertelement <16 x i8> [[I259]], i8 [[TMP232]], i64 4 -; OPT-NEXT: [[I261:%.*]] = insertelement <16 x i8> [[I260]], i8 [[TMP234]], i64 5 -; OPT-NEXT: [[I262:%.*]] = insertelement <16 x i8> [[I261]], i8 [[TMP236]], i64 6 -; OPT-NEXT: [[I263:%.*]] = insertelement <16 x i8> [[I262]], i8 [[TMP238]], i64 7 -; OPT-NEXT: [[I264:%.*]] = insertelement <16 x i8> [[I263]], i8 [[TMP240]], i64 8 -; OPT-NEXT: [[I265:%.*]] = insertelement <16 x i8> [[I264]], i8 [[TMP242]], i64 9 -; OPT-NEXT: [[I266:%.*]] = insertelement <16 x i8> [[I265]], i8 [[TMP244]], i64 10 -; OPT-NEXT: [[I267:%.*]] = insertelement <16 x i8> [[I266]], i8 [[TMP246]], i64 11 -; OPT-NEXT: [[I268:%.*]] = insertelement <16 x i8> [[I267]], i8 [[TMP248]], i64 12 -; OPT-NEXT: [[I269:%.*]] = insertelement <16 x i8> [[I268]], i8 [[TMP250]], i64 13 -; OPT-NEXT: [[TMP270:%.*]] = insertelement <16 x i8> [[I269]], i8 [[TMP252]], i64 14 -; OPT-NEXT: [[TMP271:%.*]] = shufflevector <16 x i8> [[TMP270]], <16 x i8> [[IN]], <16 x i32> -; OPT-NEXT: [[SUM:%.*]] = add <16 x i8> [[TMP271]], [[ADD]] +; OPT-NEXT: [[SUM:%.*]] = add <16 x i8> [[IN]], [[ADD]] ; OPT-NEXT: store <16 x i8> [[SUM]], ptr addrspace(3) [[OUT]], align 16 ; OPT-NEXT: ret void ; diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll b/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll index f847a90e91e9e..7a415f4cb71d0 100644 --- a/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll +++ b/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll @@ -5,70 +5,39 @@ define <32 x i8> @extract_insert_chain(<8 x i8> %in0, <8 x i8> %in1, <8 x i8> %i ; OPT-LABEL: define <32 x i8> @extract_insert_chain( ; OPT-SAME: <8 x i8> [[IN0:%.*]], <8 x i8> [[IN1:%.*]], <8 x i8> [[IN2:%.*]], <8 x i8> [[IN3:%.*]]) #[[ATTR0:[0-9]+]] { ; OPT-NEXT: [[ENTRY:.*:]] -; OPT-NEXT: [[I_0_0:%.*]] = extractelement <8 x i8> [[IN0]], i64 0 -; OPT-NEXT: [[I_0_1:%.*]] = extractelement <8 x i8> [[IN0]], i64 1 -; OPT-NEXT: [[I_0_2:%.*]] = extractelement <8 x i8> [[IN0]], i64 2 -; OPT-NEXT: [[I_0_3:%.*]] = extractelement <8 x i8> [[IN0]], i64 3 -; OPT-NEXT: [[I_0_4:%.*]] = extractelement <8 x i8> [[IN0]], i64 4 -; OPT-NEXT: [[I_0_5:%.*]] = extractelement <8 x i8> [[IN0]], i64 5 -; OPT-NEXT: [[I_0_6:%.*]] = extractelement <8 x i8> [[IN0]], i64 6 -; OPT-NEXT: [[I_0_7:%.*]] = extractelement <8 x i8> [[IN0]], i64 7 -; OPT-NEXT: [[I_1_1:%.*]] = extractelement <8 x i8> [[IN1]], i64 1 -; OPT-NEXT: [[I_1_2:%.*]] = extractelement <8 x i8> [[IN1]], i64 2 -; OPT-NEXT: [[I_1_3:%.*]] = extractelement <8 x i8> [[IN1]], i64 3 -; OPT-NEXT: [[I_1_4:%.*]] = extractelement <8 x i8> [[IN1]], i64 4 -; OPT-NEXT: [[I_1_5:%.*]] = extractelement <8 x i8> [[IN1]], i64 5 -; OPT-NEXT: [[I_1_6:%.*]] = extractelement <8 x i8> [[IN1]], i64 6 -; OPT-NEXT: [[I_1_7:%.*]] = extractelement <8 x i8> [[IN1]], i64 7 -; OPT-NEXT: [[I_2_1:%.*]] = extractelement <8 x i8> [[IN2]], i64 1 -; OPT-NEXT: [[I_2_2:%.*]] = extractelement <8 x i8> [[IN2]], i64 2 -; OPT-NEXT: [[I_2_3:%.*]] = extractelement <8 x i8> [[IN2]], i64 3 -; OPT-NEXT: [[I_2_4:%.*]] = extractelement <8 x i8> [[IN2]], i64 4 -; OPT-NEXT: [[I_2_5:%.*]] = extractelement <8 x i8> [[IN2]], i64 5 -; OPT-NEXT: [[I_2_6:%.*]] = extractelement <8 x i8> [[IN2]], i64 6 -; OPT-NEXT: [[I_2_7:%.*]] = extractelement <8 x i8> [[IN2]], i64 7 -; OPT-NEXT: [[I_3_1:%.*]] = extractelement <8 x i8> [[IN3]], i64 1 -; OPT-NEXT: [[I_3_2:%.*]] = extractelement <8 x i8> [[IN3]], i64 2 -; OPT-NEXT: [[I_3_3:%.*]] = extractelement <8 x i8> [[IN3]], i64 3 -; OPT-NEXT: [[I_3_4:%.*]] = extractelement <8 x i8> [[IN3]], i64 4 -; OPT-NEXT: [[I_3_5:%.*]] = extractelement <8 x i8> [[IN3]], i64 5 -; OPT-NEXT: [[I_3_6:%.*]] = extractelement <8 x i8> [[IN3]], i64 6 -; OPT-NEXT: [[I_3_7:%.*]] = extractelement <8 x i8> [[IN3]], i64 7 -; OPT-NEXT: [[O_0_0:%.*]] = insertelement <32 x i8> poison, i8 [[I_0_0]], i32 0 -; OPT-NEXT: [[O_0_1:%.*]] = insertelement <32 x i8> [[O_0_0]], i8 [[I_0_1]], i32 1 -; OPT-NEXT: [[O_0_2:%.*]] = insertelement <32 x i8> [[O_0_1]], i8 [[I_0_2]], i32 2 -; OPT-NEXT: [[O_0_3:%.*]] = insertelement <32 x i8> [[O_0_2]], i8 [[I_0_3]], i32 3 -; OPT-NEXT: [[O_0_4:%.*]] = insertelement <32 x i8> [[O_0_3]], i8 [[I_0_4]], i32 4 -; OPT-NEXT: [[O_0_5:%.*]] = insertelement <32 x i8> [[O_0_4]], i8 [[I_0_5]], i32 5 -; OPT-NEXT: [[O_0_6:%.*]] = insertelement <32 x i8> [[O_0_5]], i8 [[I_0_6]], i32 6 -; OPT-NEXT: [[O_0_7:%.*]] = insertelement <32 x i8> [[O_0_6]], i8 [[I_0_7]], i32 7 -; OPT-NEXT: [[TMP0:%.*]] = shufflevector <8 x i8> [[IN1]], <8 x i8> poison, <32 x i32> -; OPT-NEXT: [[O_1_0:%.*]] = shufflevector <32 x i8> [[O_0_7]], <32 x i8> [[TMP0]], <32 x i32> -; OPT-NEXT: [[O_1_1:%.*]] = insertelement <32 x i8> [[O_1_0]], i8 [[I_1_1]], i32 9 -; OPT-NEXT: [[O_1_2:%.*]] = insertelement <32 x i8> [[O_1_1]], i8 [[I_1_2]], i32 10 -; OPT-NEXT: [[O_1_3:%.*]] = insertelement <32 x i8> [[O_1_2]], i8 [[I_1_3]], i32 11 -; OPT-NEXT: [[O_1_4:%.*]] = insertelement <32 x i8> [[O_1_3]], i8 [[I_1_4]], i32 12 -; OPT-NEXT: [[O_1_5:%.*]] = insertelement <32 x i8> [[O_1_4]], i8 [[I_1_5]], i32 13 -; OPT-NEXT: [[O_1_6:%.*]] = insertelement <32 x i8> [[O_1_5]], i8 [[I_1_6]], i32 14 -; OPT-NEXT: [[O_1_7:%.*]] = insertelement <32 x i8> [[O_1_6]], i8 [[I_1_7]], i32 15 +; OPT-NEXT: [[O_1_7:%.*]] = shufflevector <8 x i8> [[IN0]], <8 x i8> [[IN1]], <32 x i32> ; OPT-NEXT: [[TMP1:%.*]] = shufflevector <8 x i8> [[IN2]], <8 x i8> poison, <32 x i32> ; OPT-NEXT: [[O_2_0:%.*]] = shufflevector <32 x i8> [[O_1_7]], <32 x i8> [[TMP1]], <32 x i32> -; OPT-NEXT: [[O_2_1:%.*]] = insertelement <32 x i8> [[O_2_0]], i8 [[I_2_1]], i32 17 -; OPT-NEXT: [[O_2_2:%.*]] = insertelement <32 x i8> [[O_2_1]], i8 [[I_2_2]], i32 18 -; OPT-NEXT: [[O_2_3:%.*]] = insertelement <32 x i8> [[O_2_2]], i8 [[I_2_3]], i32 19 -; OPT-NEXT: [[O_2_4:%.*]] = insertelement <32 x i8> [[O_2_3]], i8 [[I_2_4]], i32 20 -; OPT-NEXT: [[O_2_5:%.*]] = insertelement <32 x i8> [[O_2_4]], i8 [[I_2_5]], i32 21 -; OPT-NEXT: [[O_2_6:%.*]] = insertelement <32 x i8> [[O_2_5]], i8 [[I_2_6]], i32 22 -; OPT-NEXT: [[O_2_7:%.*]] = insertelement <32 x i8> [[O_2_6]], i8 [[I_2_7]], i32 23 +; OPT-NEXT: [[TMP8:%.*]] = shufflevector <8 x i8> [[IN2]], <8 x i8> poison, <32 x i32> +; OPT-NEXT: [[O_2_1:%.*]] = shufflevector <32 x i8> [[O_2_0]], <32 x i8> [[TMP8]], <32 x i32> +; OPT-NEXT: [[TMP16:%.*]] = shufflevector <8 x i8> [[IN2]], <8 x i8> poison, <32 x i32> +; OPT-NEXT: [[O_2_2:%.*]] = shufflevector <32 x i8> [[O_2_1]], <32 x i8> [[TMP16]], <32 x i32> +; OPT-NEXT: [[TMP3:%.*]] = shufflevector <8 x i8> [[IN2]], <8 x i8> poison, <32 x i32> +; OPT-NEXT: [[O_2_3:%.*]] = shufflevector <32 x i8> [[O_2_2]], <32 x i8> [[TMP3]], <32 x i32> +; OPT-NEXT: [[TMP4:%.*]] = shufflevector <8 x i8> [[IN2]], <8 x i8> poison, <32 x i32> +; OPT-NEXT: [[O_2_4:%.*]] = shufflevector <32 x i8> [[O_2_3]], <32 x i8> [[TMP4]], <32 x i32> +; OPT-NEXT: [[TMP5:%.*]] = shufflevector <8 x i8> [[IN2]], <8 x i8> poison, <32 x i32> +; OPT-NEXT: [[O_2_5:%.*]] = shufflevector <32 x i8> [[O_2_4]], <32 x i8> [[TMP5]], <32 x i32> +; OPT-NEXT: [[TMP6:%.*]] = shufflevector <8 x i8> [[IN2]], <8 x i8> poison, <32 x i32> +; OPT-NEXT: [[O_2_6:%.*]] = shufflevector <32 x i8> [[O_2_5]], <32 x i8> [[TMP6]], <32 x i32> +; OPT-NEXT: [[TMP7:%.*]] = shufflevector <8 x i8> [[IN2]], <8 x i8> poison, <32 x i32> +; OPT-NEXT: [[O_2_7:%.*]] = shufflevector <32 x i8> [[O_2_6]], <32 x i8> [[TMP7]], <32 x i32> ; OPT-NEXT: [[TMP2:%.*]] = shufflevector <8 x i8> [[IN3]], <8 x i8> poison, <32 x i32> ; OPT-NEXT: [[O_3_0:%.*]] = shufflevector <32 x i8> [[O_2_7]], <32 x i8> [[TMP2]], <32 x i32> -; OPT-NEXT: [[O_3_1:%.*]] = insertelement <32 x i8> [[O_3_0]], i8 [[I_3_1]], i32 25 -; OPT-NEXT: [[O_3_2:%.*]] = insertelement <32 x i8> [[O_3_1]], i8 [[I_3_2]], i32 26 -; OPT-NEXT: [[O_3_3:%.*]] = insertelement <32 x i8> [[O_3_2]], i8 [[I_3_3]], i32 27 -; OPT-NEXT: [[O_3_4:%.*]] = insertelement <32 x i8> [[O_3_3]], i8 [[I_3_4]], i32 28 -; OPT-NEXT: [[O_3_5:%.*]] = insertelement <32 x i8> [[O_3_4]], i8 [[I_3_5]], i32 29 -; OPT-NEXT: [[O_3_6:%.*]] = insertelement <32 x i8> [[O_3_5]], i8 [[I_3_6]], i32 30 -; OPT-NEXT: [[O_3_7:%.*]] = insertelement <32 x i8> [[O_3_6]], i8 [[I_3_7]], i32 31 +; OPT-NEXT: [[TMP9:%.*]] = shufflevector <8 x i8> [[IN3]], <8 x i8> poison, <32 x i32> +; OPT-NEXT: [[O_3_1:%.*]] = shufflevector <32 x i8> [[O_3_0]], <32 x i8> [[TMP9]], <32 x i32> +; OPT-NEXT: [[TMP10:%.*]] = shufflevector <8 x i8> [[IN3]], <8 x i8> poison, <32 x i32> +; OPT-NEXT: [[O_3_2:%.*]] = shufflevector <32 x i8> [[O_3_1]], <32 x i8> [[TMP10]], <32 x i32> +; OPT-NEXT: [[TMP11:%.*]] = shufflevector <8 x i8> [[IN3]], <8 x i8> poison, <32 x i32> +; OPT-NEXT: [[O_3_3:%.*]] = shufflevector <32 x i8> [[O_3_2]], <32 x i8> [[TMP11]], <32 x i32> +; OPT-NEXT: [[TMP12:%.*]] = shufflevector <8 x i8> [[IN3]], <8 x i8> poison, <32 x i32> +; OPT-NEXT: [[O_3_4:%.*]] = shufflevector <32 x i8> [[O_3_3]], <32 x i8> [[TMP12]], <32 x i32> +; OPT-NEXT: [[TMP13:%.*]] = shufflevector <8 x i8> [[IN3]], <8 x i8> poison, <32 x i32> +; OPT-NEXT: [[O_3_5:%.*]] = shufflevector <32 x i8> [[O_3_4]], <32 x i8> [[TMP13]], <32 x i32> +; OPT-NEXT: [[TMP14:%.*]] = shufflevector <8 x i8> [[IN3]], <8 x i8> poison, <32 x i32> +; OPT-NEXT: [[O_3_6:%.*]] = shufflevector <32 x i8> [[O_3_5]], <32 x i8> [[TMP14]], <32 x i32> +; OPT-NEXT: [[TMP15:%.*]] = shufflevector <8 x i8> [[IN3]], <8 x i8> poison, <32 x i32> +; OPT-NEXT: [[O_3_7:%.*]] = shufflevector <32 x i8> [[O_3_6]], <32 x i8> [[TMP15]], <32 x i32> ; OPT-NEXT: ret <32 x i8> [[O_3_7]] ; entry: @@ -148,19 +117,19 @@ define <8 x i8> @extract_insert_chain_shortening(<32 x i8> %in) { ; OPT-LABEL: define <8 x i8> @extract_insert_chain_shortening( ; OPT-SAME: <32 x i8> [[IN:%.*]]) #[[ATTR0]] { ; OPT-NEXT: [[I_1:%.*]] = extractelement <32 x i8> [[IN]], i64 17 +; OPT-NEXT: [[I_2:%.*]] = extractelement <32 x i8> [[IN]], i64 18 ; OPT-NEXT: [[I_3:%.*]] = extractelement <32 x i8> [[IN]], i64 19 ; OPT-NEXT: [[I_5:%.*]] = extractelement <32 x i8> [[IN]], i64 21 +; OPT-NEXT: [[I_6:%.*]] = extractelement <32 x i8> [[IN]], i64 22 ; OPT-NEXT: [[I_7:%.*]] = extractelement <32 x i8> [[IN]], i64 23 ; OPT-NEXT: [[O_0:%.*]] = shufflevector <32 x i8> [[IN]], <32 x i8> poison, <8 x i32> ; OPT-NEXT: [[O_1:%.*]] = insertelement <8 x i8> [[O_0]], i8 [[I_1]], i32 1 -; OPT-NEXT: [[TMP2:%.*]] = shufflevector <32 x i8> [[IN]], <32 x i8> poison, <8 x i32> -; OPT-NEXT: [[O_2:%.*]] = shufflevector <8 x i8> [[O_1]], <8 x i8> [[TMP2]], <8 x i32> +; OPT-NEXT: [[O_2:%.*]] = insertelement <8 x i8> [[O_1]], i8 [[I_2]], i32 2 ; OPT-NEXT: [[O_3:%.*]] = insertelement <8 x i8> [[O_2]], i8 [[I_3]], i32 3 ; OPT-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[IN]], <32 x i8> poison, <8 x i32> ; OPT-NEXT: [[O_4:%.*]] = shufflevector <8 x i8> [[O_3]], <8 x i8> [[TMP1]], <8 x i32> ; OPT-NEXT: [[O_5:%.*]] = insertelement <8 x i8> [[O_4]], i8 [[I_5]], i32 5 -; OPT-NEXT: [[TMP3:%.*]] = shufflevector <32 x i8> [[IN]], <32 x i8> poison, <8 x i32> -; OPT-NEXT: [[O_6:%.*]] = shufflevector <8 x i8> [[O_5]], <8 x i8> [[TMP3]], <8 x i32> +; OPT-NEXT: [[O_6:%.*]] = insertelement <8 x i8> [[O_5]], i8 [[I_6]], i32 6 ; OPT-NEXT: [[O_7:%.*]] = insertelement <8 x i8> [[O_6]], i8 [[I_7]], i32 7 ; OPT-NEXT: ret <8 x i8> [[O_7]] ;