diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 393cf2d973803..4dc2eb88c18bb 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -1335,6 +1335,39 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, if (!Mask.empty()) { std::pair LT = getTypeLegalizationCost(SrcTy); + // Check for LD2/LD4 instructions, which are represented in llvm IR as + // deinterleaving-shuffle(load). The shuffle cost could potentially be + // free, but we model it with a cost of LT.first so that LD2/LD4 have a + // higher cost than just the load. + if (Args.size() >= 1 && isa(Args[0]) && + (LT.second.getScalarSizeInBits() == 8 || + LT.second.getScalarSizeInBits() == 16 || + LT.second.getScalarSizeInBits() == 32) && + LT.second.getSizeInBits() == 128 && + ((TLI->getMaxSupportedInterleaveFactor() >= 2 && + ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, 2)) || + (TLI->getMaxSupportedInterleaveFactor() == 4 && + ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, 4)))) + return ST->getMVEVectorCostFactor(CostKind) * + std::max(1, LT.first / 4); + + // Check for ST2/ST4 instructions, which are represented in llvm IR as + // store(interleaving-shuffle). The shuffle cost could potentially be + // free, but we model it with a cost of LT.first so that ST2/ST4 have a + // higher cost than just the store. + if (CxtI && CxtI->hasOneUse() && isa(*CxtI->user_begin()) && + (LT.second.getScalarSizeInBits() == 8 || + LT.second.getScalarSizeInBits() == 16 || + LT.second.getScalarSizeInBits() == 32) && + LT.second.getSizeInBits() == 128 && + ((TLI->getMaxSupportedInterleaveFactor() >= 2 && + ShuffleVectorInst::isInterleaveMask( + Mask, 2, SrcTy->getElementCount().getKnownMinValue() * 2)) || + (TLI->getMaxSupportedInterleaveFactor() == 4 && + ShuffleVectorInst::isInterleaveMask( + Mask, 4, SrcTy->getElementCount().getKnownMinValue() * 2)))) + return ST->getMVEVectorCostFactor(CostKind) * LT.first; + if (LT.second.isVector() && Mask.size() <= LT.second.getVectorNumElements() && (isVREVMask(Mask, LT.second, 16) || isVREVMask(Mask, LT.second, 32) || diff --git a/llvm/test/Analysis/CostModel/ARM/mve-shuffle-loadstore.ll b/llvm/test/Analysis/CostModel/ARM/mve-shuffle-loadstore.ll index 6a327cfed4e4c..ef0b28ea2604a 100644 --- a/llvm/test/Analysis/CostModel/ARM/mve-shuffle-loadstore.ll +++ b/llvm/test/Analysis/CostModel/ARM/mve-shuffle-loadstore.ll @@ -7,41 +7,41 @@ define void @vld2(ptr %p) { ; CHECK-LABEL: 'vld2' ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8 = load <4 x i8>, ptr %p, align 4 -; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v4i8_0 = shufflevector <4 x i8> %v4i8, <4 x i8> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v4i8_1 = shufflevector <4 x i8> %v4i8, <4 x i8> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v4i8_0 = shufflevector <4 x i8> %v4i8, <4 x i8> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v4i8_1 = shufflevector <4 x i8> %v4i8, <4 x i8> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8 = load <8 x i8>, ptr %p, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v8i8_0 = shufflevector <8 x i8> %v8i8, <8 x i8> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v8i8_1 = shufflevector <8 x i8> %v8i8, <8 x i8> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v8i8_0 = shufflevector <8 x i8> %v8i8, <8 x i8> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v8i8_1 = shufflevector <8 x i8> %v8i8, <8 x i8> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8 = load <16 x i8>, ptr %p, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %v16i8_0 = shufflevector <16 x i8> %v16i8, <16 x i8> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %v16i8_1 = shufflevector <16 x i8> %v16i8, <16 x i8> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %v16i8_0 = shufflevector <16 x i8> %v16i8, <16 x i8> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %v16i8_1 = shufflevector <16 x i8> %v16i8, <16 x i8> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8 = load <32 x i8>, ptr %p, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %v32i8_0 = shufflevector <32 x i8> %v32i8, <32 x i8> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %v32i8_1 = shufflevector <32 x i8> %v32i8, <32 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 258 for instruction: %v32i8_0 = shufflevector <32 x i8> %v32i8, <32 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 258 for instruction: %v32i8_1 = shufflevector <32 x i8> %v32i8, <32 x i8> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = load <4 x i16>, ptr %p, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v4i16_0 = shufflevector <4 x i16> %v4i16, <4 x i16> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v4i16_1 = shufflevector <4 x i16> %v4i16, <4 x i16> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v4i16_0 = shufflevector <4 x i16> %v4i16, <4 x i16> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v4i16_1 = shufflevector <4 x i16> %v4i16, <4 x i16> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16 = load <8 x i16>, ptr %p, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v8i16_0 = shufflevector <8 x i16> %v8i16, <8 x i16> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v8i16_1 = shufflevector <8 x i16> %v8i16, <8 x i16> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v8i16_0 = shufflevector <8 x i16> %v8i16, <8 x i16> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v8i16_1 = shufflevector <8 x i16> %v8i16, <8 x i16> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i16 = load <16 x i16>, ptr %p, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %v16i16_0 = shufflevector <16 x i16> %v16i16, <16 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %v16i16_1 = shufflevector <16 x i16> %v16i16, <16 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %v16i16_0 = shufflevector <16 x i16> %v16i16, <16 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %v16i16_1 = shufflevector <16 x i16> %v16i16, <16 x i16> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i16 = load <32 x i16>, ptr %p, align 64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %v32i16_0 = shufflevector <32 x i16> %v32i16, <32 x i16> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %v32i16_1 = shufflevector <32 x i16> %v32i16, <32 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 258 for instruction: %v32i16_0 = shufflevector <32 x i16> %v32i16, <32 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 258 for instruction: %v32i16_1 = shufflevector <32 x i16> %v32i16, <32 x i16> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32 = load <4 x i32>, ptr %p, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v4i32_0 = shufflevector <4 x i32> %v4i32, <4 x i32> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v4i32_1 = shufflevector <4 x i32> %v4i32, <4 x i32> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v4i32_0 = shufflevector <4 x i32> %v4i32, <4 x i32> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v4i32_1 = shufflevector <4 x i32> %v4i32, <4 x i32> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i32 = load <8 x i32>, ptr %p, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v8i32_0 = shufflevector <8 x i32> %v8i32, <8 x i32> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v8i32_1 = shufflevector <8 x i32> %v8i32, <8 x i32> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v8i32_0 = shufflevector <8 x i32> %v8i32, <8 x i32> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v8i32_1 = shufflevector <8 x i32> %v8i32, <8 x i32> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i32 = load <16 x i32>, ptr %p, align 64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %v16i32_0 = shufflevector <16 x i32> %v16i32, <16 x i32> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %v16i32_1 = shufflevector <16 x i32> %v16i32, <16 x i32> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %v16i32_0 = shufflevector <16 x i32> %v16i32, <16 x i32> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %v16i32_1 = shufflevector <16 x i32> %v16i32, <16 x i32> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i32 = load <32 x i32>, ptr %p, align 128 -; CHECK-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %v32i32_0 = shufflevector <32 x i32> %v32i32, <32 x i32> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %v32i32_1 = shufflevector <32 x i32> %v32i32, <32 x i32> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %v32i32_0 = shufflevector <32 x i32> %v32i32, <32 x i32> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %v32i32_1 = shufflevector <32 x i32> %v32i32, <32 x i32> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = load <4 x i64>, ptr %p, align 32 ; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v2i64_0 = shufflevector <4 x i64> %v2i64, <4 x i64> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v2i64_1 = shufflevector <4 x i64> %v2i64, <4 x i64> undef, <2 x i32> @@ -252,88 +252,171 @@ define void @vld3(ptr %p) { } define void @vld4(ptr %p) { -; CHECK-LABEL: 'vld4' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = load <8 x i8>, ptr %p, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v2i8_0 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v2i8_1 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v2i8_2 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v2i8_3 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8 = load <16 x i8>, ptr %p, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v4i8_0 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v4i8_1 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v4i8_2 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v4i8_3 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8 = load <32 x i8>, ptr %p, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v8i8_0 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v8i8_1 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v8i8_2 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v8i8_3 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8 = load <64 x i8>, ptr %p, align 64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v16i8_0 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v16i8_1 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v16i8_2 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v16i8_3 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = load <8 x i16>, ptr %p, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v2i16_0 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v2i16_1 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v2i16_2 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v2i16_3 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = load <16 x i16>, ptr %p, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v4i16_0 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v4i16_1 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v4i16_2 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v4i16_3 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16 = load <32 x i16>, ptr %p, align 64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v8i16_0 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v8i16_1 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v8i16_2 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v8i16_3 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i16 = load <64 x i16>, ptr %p, align 128 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v16i16_0 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v16i16_1 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v16i16_2 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v16i16_3 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i32 = load <8 x i32>, ptr %p, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v2i32_0 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v2i32_1 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v2i32_2 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v2i32_3 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i32 = load <16 x i32>, ptr %p, align 64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v4i32_0 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v4i32_1 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v4i32_2 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v4i32_3 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8i32 = load <32 x i32>, ptr %p, align 128 -; CHECK-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v8i32_0 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v8i32_1 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v8i32_2 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v8i32_3 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16i32 = load <64 x i32>, ptr %p, align 256 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v16i32_0 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v16i32_1 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v16i32_2 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v16i32_3 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i64 = load <8 x i64>, ptr %p, align 64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v2i64_0 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v2i64_1 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v2i64_2 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v2i64_3 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4i64 = load <16 x i64>, ptr %p, align 128 -; CHECK-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v4i64_0 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v4i64_1 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v4i64_2 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v4i64_3 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8i64 = load <32 x i64>, ptr %p, align 256 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v8i64_0 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v8i64_1 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v8i64_2 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v8i64_3 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v16i64 = load <64 x i64>, ptr %p, align 512 -; CHECK-NEXT: Cost Model: Found an estimated cost of 2560 for instruction: %v16i64_0 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 2560 for instruction: %v16i64_1 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 2560 for instruction: %v16i64_2 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 2560 for instruction: %v16i64_3 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-UF2-LABEL: 'vld4' +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = load <8 x i8>, ptr %p, align 8 +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v2i8_0 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v2i8_1 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v2i8_2 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v2i8_3 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8 = load <16 x i8>, ptr %p, align 8 +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v4i8_0 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v4i8_1 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v4i8_2 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v4i8_3 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8 = load <32 x i8>, ptr %p, align 32 +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v8i8_0 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v8i8_1 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v8i8_2 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v8i8_3 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8 = load <64 x i8>, ptr %p, align 64 +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v16i8_0 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v16i8_1 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v16i8_2 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v16i8_3 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = load <8 x i16>, ptr %p, align 8 +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v2i16_0 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v2i16_1 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v2i16_2 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v2i16_3 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = load <16 x i16>, ptr %p, align 32 +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v4i16_0 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v4i16_1 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v4i16_2 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v4i16_3 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16 = load <32 x i16>, ptr %p, align 64 +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v8i16_0 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v8i16_1 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v8i16_2 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v8i16_3 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i16 = load <64 x i16>, ptr %p, align 128 +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v16i16_0 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v16i16_1 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v16i16_2 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v16i16_3 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i32 = load <8 x i32>, ptr %p, align 32 +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v2i32_0 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v2i32_1 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v2i32_2 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v2i32_3 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i32 = load <16 x i32>, ptr %p, align 64 +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v4i32_0 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v4i32_1 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v4i32_2 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v4i32_3 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8i32 = load <32 x i32>, ptr %p, align 128 +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v8i32_0 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v8i32_1 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v8i32_2 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v8i32_3 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16i32 = load <64 x i32>, ptr %p, align 256 +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v16i32_0 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v16i32_1 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v16i32_2 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v16i32_3 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i64 = load <8 x i64>, ptr %p, align 64 +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v2i64_0 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v2i64_1 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v2i64_2 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v2i64_3 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4i64 = load <16 x i64>, ptr %p, align 128 +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v4i64_0 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v4i64_1 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v4i64_2 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v4i64_3 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8i64 = load <32 x i64>, ptr %p, align 256 +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v8i64_0 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v8i64_1 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v8i64_2 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v8i64_3 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v16i64 = load <64 x i64>, ptr %p, align 512 +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 2560 for instruction: %v16i64_0 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 2560 for instruction: %v16i64_1 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 2560 for instruction: %v16i64_2 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 2560 for instruction: %v16i64_3 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-UF4-LABEL: 'vld4' +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = load <8 x i8>, ptr %p, align 8 +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v2i8_0 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v2i8_1 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v2i8_2 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v2i8_3 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8 = load <16 x i8>, ptr %p, align 8 +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v4i8_0 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v4i8_1 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v4i8_2 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v4i8_3 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i8 = load <32 x i8>, ptr %p, align 32 +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %v8i8_0 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %v8i8_1 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %v8i8_2 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %v8i8_3 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8 = load <64 x i8>, ptr %p, align 64 +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 258 for instruction: %v16i8_0 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 258 for instruction: %v16i8_1 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 258 for instruction: %v16i8_2 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 258 for instruction: %v16i8_3 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = load <8 x i16>, ptr %p, align 8 +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v2i16_0 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v2i16_1 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v2i16_2 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v2i16_3 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = load <16 x i16>, ptr %p, align 32 +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v4i16_0 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v4i16_1 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v4i16_2 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v4i16_3 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16 = load <32 x i16>, ptr %p, align 64 +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %v8i16_0 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %v8i16_1 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %v8i16_2 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %v8i16_3 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i16 = load <64 x i16>, ptr %p, align 128 +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %v16i16_0 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %v16i16_1 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %v16i16_2 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %v16i16_3 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i32 = load <8 x i32>, ptr %p, align 32 +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v2i32_0 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v2i32_1 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v2i32_2 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v2i32_3 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i32 = load <16 x i32>, ptr %p, align 64 +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v4i32_0 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v4i32_1 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v4i32_2 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v4i32_3 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8i32 = load <32 x i32>, ptr %p, align 128 +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %v8i32_0 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %v8i32_1 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %v8i32_2 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %v8i32_3 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16i32 = load <64 x i32>, ptr %p, align 256 +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %v16i32_0 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %v16i32_1 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %v16i32_2 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %v16i32_3 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i64 = load <8 x i64>, ptr %p, align 64 +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v2i64_0 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v2i64_1 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v2i64_2 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v2i64_3 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4i64 = load <16 x i64>, ptr %p, align 128 +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v4i64_0 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v4i64_1 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v4i64_2 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %v4i64_3 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8i64 = load <32 x i64>, ptr %p, align 256 +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v8i64_0 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v8i64_1 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v8i64_2 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %v8i64_3 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v16i64 = load <64 x i64>, ptr %p, align 512 +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 2560 for instruction: %v16i64_0 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 2560 for instruction: %v16i64_1 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 2560 for instruction: %v16i64_2 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 2560 for instruction: %v16i64_3 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %v2i8 = load <8 x i8>, ptr %p %v2i8_0 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> @@ -424,29 +507,29 @@ define void @vld4(ptr %p) { define void @vst2(ptr %p) { ; CHECK-LABEL: 'vst2' -; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i8> %v4i8, ptr %p, align 4 -; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i8> %v8i8, ptr %p, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i8> %v16i8, ptr %p, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %v32i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <32 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <32 x i8> %v32i8, ptr %p, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i16> %v4i16, ptr %p, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i16> %v8i16, ptr %p, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i16> %v16i16, ptr %p, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %v32i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <32 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <32 x i16> %v32i16, ptr %p, align 64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> %v4i32, ptr %p, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i32> %v8i32, ptr %p, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v16i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <16 x i32> %v16i32, ptr %p, align 64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %v32i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <32 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <32 x i32> %v32i32, ptr %p, align 128 ; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v4i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i64> %v4i64, ptr %p, align 32 @@ -575,40 +658,75 @@ define void @vst3(ptr %p) { define void @vst4(ptr %p) { -; CHECK-LABEL: 'vst4' -; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i8> %v8i8, ptr %p, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i8> %v16i8, ptr %p, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %v32i8 = shufflevector <32 x i8> undef, <32 x i8> undef, <32 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <32 x i8> %v32i8, ptr %p, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v64i8 = shufflevector <64 x i8> undef, <64 x i8> undef, <64 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <64 x i8> %v64i8, ptr %p, align 64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i16> %v8i16, ptr %p, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i16> %v16i16, ptr %p, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %v32i16 = shufflevector <32 x i16> undef, <32 x i16> undef, <32 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <32 x i16> %v32i16, ptr %p, align 64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v64i16 = shufflevector <64 x i16> undef, <64 x i16> undef, <64 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <64 x i16> %v64i16, ptr %p, align 128 -; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i32> %v8i32, ptr %p, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <16 x i32> %v16i32, ptr %p, align 64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %v32i32 = shufflevector <32 x i32> undef, <32 x i32> undef, <32 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <32 x i32> %v32i32, ptr %p, align 128 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v64i32 = shufflevector <64 x i32> undef, <64 x i32> undef, <64 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: store <64 x i32> %v64i32, ptr %p, align 256 -; CHECK-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i64> %v8i64, ptr %p, align 64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i64> %v16i64, ptr %p, align 128 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v32i64 = shufflevector <32 x i64> undef, <32 x i64> undef, <32 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: store <32 x i64> %v32i64, ptr %p, align 256 -; CHECK-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: %v64i64 = shufflevector <64 x i64> undef, <64 x i64> undef, <64 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: store <64 x i64> %v64i64, ptr %p, align 512 -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-UF2-LABEL: 'vst4' +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i8> %v8i8, ptr %p, align 8 +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i8> %v16i8, ptr %p, align 8 +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %v32i8 = shufflevector <32 x i8> undef, <32 x i8> undef, <32 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <32 x i8> %v32i8, ptr %p, align 32 +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v64i8 = shufflevector <64 x i8> undef, <64 x i8> undef, <64 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <64 x i8> %v64i8, ptr %p, align 64 +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i16> %v8i16, ptr %p, align 8 +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i16> %v16i16, ptr %p, align 32 +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %v32i16 = shufflevector <32 x i16> undef, <32 x i16> undef, <32 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <32 x i16> %v32i16, ptr %p, align 64 +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v64i16 = shufflevector <64 x i16> undef, <64 x i16> undef, <64 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <64 x i16> %v64i16, ptr %p, align 128 +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i32> %v8i32, ptr %p, align 32 +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <16 x i32> %v16i32, ptr %p, align 64 +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %v32i32 = shufflevector <32 x i32> undef, <32 x i32> undef, <32 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <32 x i32> %v32i32, ptr %p, align 128 +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v64i32 = shufflevector <64 x i32> undef, <64 x i32> undef, <64 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: store <64 x i32> %v64i32, ptr %p, align 256 +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i64> %v8i64, ptr %p, align 64 +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i64> %v16i64, ptr %p, align 128 +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v32i64 = shufflevector <32 x i64> undef, <32 x i64> undef, <32 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: store <32 x i64> %v32i64, ptr %p, align 256 +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: %v64i64 = shufflevector <64 x i64> undef, <64 x i64> undef, <64 x i32> +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: store <64 x i64> %v64i64, ptr %p, align 512 +; CHECK-UF2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-UF4-LABEL: 'vst4' +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i8> %v8i8, ptr %p, align 8 +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i8> %v16i8, ptr %p, align 8 +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8 = shufflevector <32 x i8> undef, <32 x i8> undef, <32 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <32 x i8> %v32i8, ptr %p, align 32 +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v64i8 = shufflevector <64 x i8> undef, <64 x i8> undef, <64 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <64 x i8> %v64i8, ptr %p, align 64 +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i16> %v8i16, ptr %p, align 8 +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i16> %v16i16, ptr %p, align 32 +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i16 = shufflevector <32 x i16> undef, <32 x i16> undef, <32 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <32 x i16> %v32i16, ptr %p, align 64 +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i16 = shufflevector <64 x i16> undef, <64 x i16> undef, <64 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <64 x i16> %v64i16, ptr %p, align 128 +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i32> %v8i32, ptr %p, align 32 +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <16 x i32> %v16i32, ptr %p, align 64 +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i32 = shufflevector <32 x i32> undef, <32 x i32> undef, <32 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <32 x i32> %v32i32, ptr %p, align 128 +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v64i32 = shufflevector <64 x i32> undef, <64 x i32> undef, <64 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 32 for instruction: store <64 x i32> %v64i32, ptr %p, align 256 +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i64> %v8i64, ptr %p, align 64 +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i64> %v16i64, ptr %p, align 128 +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v32i64 = shufflevector <32 x i64> undef, <32 x i64> undef, <32 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 32 for instruction: store <32 x i64> %v32i64, ptr %p, align 256 +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: %v64i64 = shufflevector <64 x i64> undef, <64 x i64> undef, <64 x i32> +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 64 for instruction: store <64 x i64> %v64i64, ptr %p, align 512 +; CHECK-UF4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> store <8 x i8> %v8i8, ptr %p @@ -648,6 +766,3 @@ define void @vst4(ptr %p) { ret void } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK-UF2: {{.*}} -; CHECK-UF4: {{.*}}