diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 83796f20731ac..6e1e02f38113e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -3938,43 +3938,55 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) { GetSplitVector(N->getOperand(0), Lo, Hi); - uint64_t LoEltsMin = Lo.getValueType().getVectorMinNumElements(); - uint64_t IdxVal = Idx->getAsZExtVal(); + ElementCount LoElts = Lo.getValueType().getVectorElementCount(); + // Note: For scalable vectors, the index is scaled by vscale. + ElementCount IdxVal = + ElementCount::get(Idx->getAsZExtVal(), SubVT.isScalableVector()); + uint64_t IdxValMin = IdxVal.getKnownMinValue(); - unsigned NumResultElts = SubVT.getVectorMinNumElements(); + EVT SrcVT = N->getOperand(0).getValueType(); + ElementCount NumResultElts = SubVT.getVectorElementCount(); - if (IdxVal < LoEltsMin) { - // If the extracted elements are all in the low half, do a simple extract. - if (IdxVal + NumResultElts <= LoEltsMin) - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx); + // If the extracted elements are all in the low half, do a simple extract. + if (ElementCount::isKnownLE(IdxVal + NumResultElts, LoElts)) + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx); + unsigned LoEltsMin = LoElts.getKnownMinValue(); + if (IdxValMin < LoEltsMin && SubVT.isFixedLengthVector() && + SrcVT.isFixedLengthVector()) { // Extracted subvector crosses vector split, so we need to blend the two // halves. // TODO: May be able to emit partial extract_subvector. SmallVector Elts; - Elts.reserve(NumResultElts); + Elts.reserve(NumResultElts.getFixedValue()); - DAG.ExtractVectorElements(Lo, Elts, /*Start=*/IdxVal, - /*Count=*/LoEltsMin - IdxVal); + // This is not valid for scalable vectors. If SubVT is scalable, this is the + // same as unrolling a scalable dimension (invalid). If ScrVT is scalable, + // `Lo[LoEltsMin]` may not be the last element of `Lo`. + DAG.ExtractVectorElements(Lo, Elts, /*Start=*/IdxValMin, + /*Count=*/LoEltsMin - IdxValMin); DAG.ExtractVectorElements(Hi, Elts, /*Start=*/0, /*Count=*/SubVT.getVectorNumElements() - Elts.size()); return DAG.getBuildVector(SubVT, dl, Elts); } - EVT SrcVT = N->getOperand(0).getValueType(); if (SubVT.isScalableVector() == SrcVT.isScalableVector()) { - uint64_t ExtractIdx = IdxVal - LoEltsMin; - if (ExtractIdx % NumResultElts == 0) - return DAG.getExtractSubvector(dl, SubVT, Hi, ExtractIdx); + ElementCount ExtractIdx = IdxVal - LoElts; + if (ExtractIdx.isKnownMultipleOf(NumResultElts)) + return DAG.getExtractSubvector(dl, SubVT, Hi, + ExtractIdx.getKnownMinValue()); - // We cannot create an extract_subvector that isn't a multiple of the result - // size, which may go out of bounds for the last elements. Shuffle the - // desired elements down to 0 and do a simple 0 extract. EVT HiVT = Hi.getValueType(); + assert(HiVT.isFixedLengthVector() && + "Only fixed-vector extracts are supported in this case"); + + // We cannot create an extract_subvector that isn't a multiple of the + // result size, which may go out of bounds for the last elements. Shuffle + // the desired elements down to 0 and do a simple 0 extract. SmallVector Mask(HiVT.getVectorNumElements(), -1); - for (int I = 0; I != static_cast(NumResultElts); ++I) - Mask[I] = ExtractIdx + I; + for (int I = 0; I != int(NumResultElts.getFixedValue()); ++I) + Mask[I] = int(ExtractIdx.getFixedValue()) + I; SDValue Shuffle = DAG.getVectorShuffle(HiVT, dl, Hi, DAG.getPOISON(HiVT), Mask); diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-extract-256-bits.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-extract-256-bits.ll new file mode 100644 index 0000000000000..71c9a941807a4 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-extract-256-bits.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=256 -aarch64-sve-vector-bits-max=256 < %s -o - | FileCheck %s + +; Note: This test case is reduced from: https://github.com/llvm/llvm-project/pull/166748#issuecomment-3600498185 + +define i32 @test_extract_v8i32_from_nxv8i32( %vec) nounwind { +; CHECK-LABEL: test_extract_v8i32_from_nxv8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-2 +; CHECK-NEXT: str z0, [sp] +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ldr z0, [sp] +; CHECK-NEXT: str z1, [sp, #1, mul vl] +; CHECK-NEXT: uaddv d0, p0, z0.s +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: addvl sp, sp, #2 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %1 = tail call <8 x i32> @llvm.vector.extract.v8i32.nxv8i32( %vec, i64 0) + %2 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %1) + ret i32 %2 +}