Skip to content

Commit

Permalink
[VectorCombine] Add special handling for truncating shuffles (#70013)
Browse files Browse the repository at this point in the history
When dealing with a truncating shuffle, we can end up in a situation
where the type passed to getShuffleCost is the type of the result of the
shuffle, and the mask references an element which is out of bounds of
the result vector.

If dealing with truncating shuffles, pass the type of the input vectors
to `getShuffleCost()` in order to avoid an out-of-bounds assertion.
  • Loading branch information
omern1 committed Oct 24, 2023
1 parent 254558a commit 8e31acf
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 4 deletions.
15 changes: 11 additions & 4 deletions llvm/lib/Transforms/Vectorize/VectorCombine.cpp
Expand Up @@ -1472,21 +1472,28 @@ bool VectorCombine::foldShuffleFromReductions(Instruction &I) {
dyn_cast<FixedVectorType>(Shuffle->getOperand(0)->getType());
if (!ShuffleInputType)
return false;
int NumInputElts = ShuffleInputType->getNumElements();
unsigned NumInputElts = ShuffleInputType->getNumElements();

// Find the mask from sorting the lanes into order. This is most likely to
// become a identity or concat mask. Undef elements are pushed to the end.
SmallVector<int> ConcatMask;
Shuffle->getShuffleMask(ConcatMask);
sort(ConcatMask, [](int X, int Y) { return (unsigned)X < (unsigned)Y; });
// In the case of a truncating shuffle it's possible for the mask
// to have an index greater than the size of the resulting vector.
// This requires special handling.
bool IsTruncatingShuffle = VecType->getNumElements() < NumInputElts;
bool UsesSecondVec =
any_of(ConcatMask, [&](int M) { return M >= NumInputElts; });
any_of(ConcatMask, [&](int M) { return M >= (int)NumInputElts; });

FixedVectorType *VecTyForCost =
(UsesSecondVec && !IsTruncatingShuffle) ? VecType : ShuffleInputType;
InstructionCost OldCost = TTI.getShuffleCost(
UsesSecondVec ? TTI::SK_PermuteTwoSrc : TTI::SK_PermuteSingleSrc,
UsesSecondVec ? VecType : ShuffleInputType, Shuffle->getShuffleMask());
VecTyForCost, Shuffle->getShuffleMask());
InstructionCost NewCost = TTI.getShuffleCost(
UsesSecondVec ? TTI::SK_PermuteTwoSrc : TTI::SK_PermuteSingleSrc,
UsesSecondVec ? VecType : ShuffleInputType, ConcatMask);
VecTyForCost, ConcatMask);

LLVM_DEBUG(dbgs() << "Found a reduction feeding from a shuffle: " << *Shuffle
<< "\n");
Expand Down
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
; RUN: opt -S --passes=vector-combine -mtriple=x86_64-unknown-linux < %s | FileCheck %s
; RUN: opt -S --passes=vector-combine -mtriple=x86_64-sie-ps5 < %s | FileCheck %s

define i16 @test_spill_mixed() {
; CHECK-LABEL: define i16 @test_spill_mixed() {
Expand All @@ -14,4 +15,17 @@ entry:
ret i16 0
}

define i16 @crash() {
; CHECK-LABEL: define i16 @crash() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i32> zeroinitializer, <8 x i32> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 9>
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP0]])
; CHECK-NEXT: ret i16 0
;
entry:
%0 = shufflevector <8 x i32> zeroinitializer, <8 x i32> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 9>
%1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %0)
ret i16 0
}

declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)

0 comments on commit 8e31acf

Please sign in to comment.