diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 97b8737df83e4..a5a9f00f29362 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -952,8 +952,13 @@ static void addMask(SmallVectorImpl &Mask, ArrayRef SubMask, bool ExtendingManyInputs = false) { if (SubMask.empty()) return; - assert((!ExtendingManyInputs || SubMask.size() > Mask.size()) && - "SubMask with many inputs support must be larger than the mask."); + assert( + (!ExtendingManyInputs || SubMask.size() > Mask.size() || + // Check if input scalars were extended to match the size of other node. + (SubMask.size() == Mask.size() && + std::all_of(std::next(Mask.begin(), Mask.size() / 2), Mask.end(), + [](int Idx) { return Idx == PoisonMaskElem; }))) && + "SubMask with many inputs support must be larger than the mask."); if (Mask.empty()) { Mask.append(SubMask.begin(), SubMask.end()); return; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr63668.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr63668.ll new file mode 100644 index 0000000000000..391771e06cab8 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr63668.ll @@ -0,0 +1,54 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=znver4 -S < %s | FileCheck %s + +define internal i32 @testfunc() { +; CHECK-LABEL: define internal i32 @testfunc +; CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: br label [[TMP1:%.*]] +; CHECK: 1: +; CHECK-NEXT: [[TMP2:%.*]] = phi float [ 0.000000e+00, [[TMP0:%.*]] ], [ 0.000000e+00, [[TMP8:%.*]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi float [ 0.000000e+00, [[TMP0]] ], [ 0.000000e+00, [[TMP8]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi float [ 0.000000e+00, [[TMP0]] ], [ 0.000000e+00, [[TMP8]] ] +; CHECK-NEXT: br i1 false, label [[TMP8]], label [[TMP5:%.*]] +; CHECK: 5: +; CHECK-NEXT: br i1 false, label [[TMP6:%.*]], label [[TMP8]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x float> zeroinitializer, i64 0 +; CHECK-NEXT: br label [[TMP8]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = phi float [ [[TMP7]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ 0.000000e+00, [[TMP5]] ] +; CHECK-NEXT: [[TMP10:%.*]] = phi float [ [[TMP2]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ [[TMP2]], [[TMP5]] ] +; CHECK-NEXT: [[TMP11:%.*]] = phi float [ [[TMP7]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ 0.000000e+00, [[TMP5]] ] +; CHECK-NEXT: [[TMP12:%.*]] = phi float [ [[TMP7]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ 0.000000e+00, [[TMP5]] ] +; CHECK-NEXT: [[TMP13:%.*]] = phi float [ [[TMP7]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ 0.000000e+00, [[TMP5]] ] +; CHECK-NEXT: [[TMP14:%.*]] = phi float [ [[TMP3]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ 0.000000e+00, [[TMP5]] ] +; CHECK-NEXT: [[TMP15:%.*]] = phi float [ [[TMP4]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ 0.000000e+00, [[TMP5]] ] +; CHECK-NEXT: [[TMP16:%.*]] = phi float [ [[TMP4]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ [[TMP3]], [[TMP5]] ] +; CHECK-NEXT: br label [[TMP1]] +; + br label %1 + +1: ; preds = %8, %0 + %2 = phi float [ 0.000000e+00, %0 ], [ 0.000000e+00, %8 ] + %3 = phi float [ 0.000000e+00, %0 ], [ 0.000000e+00, %8 ] + %4 = phi float [ 0.000000e+00, %0 ], [ 0.000000e+00, %8 ] + br i1 false, label %8, label %5 + +5: ; preds = %1 + br i1 false, label %6, label %8 + +6: ; preds = %5 + %7 = extractelement <8 x float> zeroinitializer, i64 0 + br label %8 + +8: ; preds = %6, %5, %1 + %9 = phi float [ %7, %6 ], [ 0.000000e+00, %1 ], [ 0.000000e+00, %5 ] + %10 = phi float [ %2, %6 ], [ 0.000000e+00, %1 ], [ %2, %5 ] + %11 = phi float [ %7, %6 ], [ 0.000000e+00, %1 ], [ 0.000000e+00, %5 ] + %12 = phi float [ %7, %6 ], [ 0.000000e+00, %1 ], [ 0.000000e+00, %5 ] + %13 = phi float [ %7, %6 ], [ 0.000000e+00, %1 ], [ 0.000000e+00, %5 ] + %14 = phi float [ %3, %6 ], [ 0.000000e+00, %1 ], [ 0.000000e+00, %5 ] + %15 = phi float [ %4, %6 ], [ 0.000000e+00, %1 ], [ 0.000000e+00, %5 ] + %16 = phi float [ %4, %6 ], [ 0.000000e+00, %1 ], [ %3, %5 ] + br label %1 +}