diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 8f46fdc2f7ca9..34ffac8ab1650 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -469,6 +469,22 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, return LT.first * getRISCVInstructionCost(RISCV::VSLIDEDOWN_VI, LT.second, CostKind); case TTI::SK_InsertSubvector: + // If we're inserting a subvector of *exactly* m1 size at a sub-register + // boundary this is a subregister insert at worst and won't require the + // slideup. We require the subvec to to be exactly VLEN as otherwise + // we'd have to account for tail elements in the m1 container if any. + // TODO: Extend for aligned m2, m4 inserts + // TODO: Extend for scalable subvector types + if (std::pair SubLT = getTypeLegalizationCost(SubTp); + SubLT.second.isValid() && SubLT.second.isFixedLengthVector()) { + const unsigned MinVLen = ST->getRealMinVLen(); + const unsigned MaxVLen = ST->getRealMaxVLen(); + if (MinVLen == MaxVLen && + SubLT.second.getScalarSizeInBits() * Index % MinVLen == 0 && + SubLT.second.getSizeInBits() == MinVLen) + return TTI::TCC_Free; + } + // Example sequence: // vsetivli zero, 4, e8, mf2, tu, ma (ignored) // vslideup.vi v8, v9, 2 diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-insert_subvector.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-insert_subvector.ll index a91d562b3f6f1..9b07e57752eec 100644 --- a/llvm/test/Analysis/CostModel/RISCV/shuffle-insert_subvector.ll +++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-insert_subvector.ll @@ -527,7 +527,7 @@ define void @fixed_m1_in_m2_notail(<8 x i32> %src, <8 x i32> %passthru) vscale_r ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = shufflevector <8 x i32> %src, <8 x i32> %passthru, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = shufflevector <8 x i32> %src, <8 x i32> %passthru, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = shufflevector <8 x i32> %src, <8 x i32> %passthru, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = shufflevector <8 x i32> %src, <8 x i32> %passthru, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %5 = shufflevector <8 x i32> %src, <8 x i32> %passthru, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'fixed_m1_in_m2_notail' @@ -535,7 +535,7 @@ define void @fixed_m1_in_m2_notail(<8 x i32> %src, <8 x i32> %passthru) vscale_r ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = shufflevector <8 x i32> %src, <8 x i32> %passthru, <8 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = shufflevector <8 x i32> %src, <8 x i32> %passthru, <8 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = shufflevector <8 x i32> %src, <8 x i32> %passthru, <8 x i32> -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = shufflevector <8 x i32> %src, <8 x i32> %passthru, <8 x i32> +; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %5 = shufflevector <8 x i32> %src, <8 x i32> %passthru, <8 x i32> ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; shufflevector <8 x i32> %src, <8 x i32> %passthru, <8 x i32>