[SLP]Relax assertion to check if the input scalars were extended to

match the size of base node (PR63668). Need to adjust the check for assert and take into account case where the original scalars are reused and were extended to match the vector factor of the reused SLP node.
llvm · Jul 14, 2023 · 8ab962e · 8ab962e
1 parent 5475441
commit 8ab962e
Show file tree

Hide file tree

Showing 2 changed files with 61 additions and 2 deletions.
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -952,8 +952,13 @@ static void addMask(SmallVectorImpl<int> &Mask, ArrayRef<int> SubMask,
                     bool ExtendingManyInputs = false) {
   if (SubMask.empty())
     return;
-  assert((!ExtendingManyInputs || SubMask.size() > Mask.size()) &&
-         "SubMask with many inputs support must be larger than the mask.");
+  assert(
+      (!ExtendingManyInputs || SubMask.size() > Mask.size() ||
+       // Check if input scalars were extended to match the size of other node.
+       (SubMask.size() == Mask.size() &&
+        std::all_of(std::next(Mask.begin(), Mask.size() / 2), Mask.end(),
+                    [](int Idx) { return Idx == PoisonMaskElem; }))) &&
+      "SubMask with many inputs support must be larger than the mask.");
   if (Mask.empty()) {
     Mask.append(SubMask.begin(), SubMask.end());
     return;

diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr63668.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr63668.ll
@@ -0,0 +1,54 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=znver4 -S < %s | FileCheck %s
+
+define internal i32 @testfunc() {
+; CHECK-LABEL: define internal i32 @testfunc
+; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    br label [[TMP1:%.*]]
+; CHECK:       1:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi float [ 0.000000e+00, [[TMP0:%.*]] ], [ 0.000000e+00, [[TMP8:%.*]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = phi float [ 0.000000e+00, [[TMP0]] ], [ 0.000000e+00, [[TMP8]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi float [ 0.000000e+00, [[TMP0]] ], [ 0.000000e+00, [[TMP8]] ]
+; CHECK-NEXT:    br i1 false, label [[TMP8]], label [[TMP5:%.*]]
+; CHECK:       5:
+; CHECK-NEXT:    br i1 false, label [[TMP6:%.*]], label [[TMP8]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <8 x float> zeroinitializer, i64 0
+; CHECK-NEXT:    br label [[TMP8]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = phi float [ [[TMP7]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ 0.000000e+00, [[TMP5]] ]
+; CHECK-NEXT:    [[TMP10:%.*]] = phi float [ [[TMP2]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ [[TMP2]], [[TMP5]] ]
+; CHECK-NEXT:    [[TMP11:%.*]] = phi float [ [[TMP7]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ 0.000000e+00, [[TMP5]] ]
+; CHECK-NEXT:    [[TMP12:%.*]] = phi float [ [[TMP7]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ 0.000000e+00, [[TMP5]] ]
+; CHECK-NEXT:    [[TMP13:%.*]] = phi float [ [[TMP7]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ 0.000000e+00, [[TMP5]] ]
+; CHECK-NEXT:    [[TMP14:%.*]] = phi float [ [[TMP3]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ 0.000000e+00, [[TMP5]] ]
+; CHECK-NEXT:    [[TMP15:%.*]] = phi float [ [[TMP4]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ 0.000000e+00, [[TMP5]] ]
+; CHECK-NEXT:    [[TMP16:%.*]] = phi float [ [[TMP4]], [[TMP6]] ], [ 0.000000e+00, [[TMP1]] ], [ [[TMP3]], [[TMP5]] ]
+; CHECK-NEXT:    br label [[TMP1]]
+;
+  br label %1
+
+1:                                                ; preds = %8, %0
+  %2 = phi float [ 0.000000e+00, %0 ], [ 0.000000e+00, %8 ]
+  %3 = phi float [ 0.000000e+00, %0 ], [ 0.000000e+00, %8 ]
+  %4 = phi float [ 0.000000e+00, %0 ], [ 0.000000e+00, %8 ]
+  br i1 false, label %8, label %5
+
+5:                                                ; preds = %1
+  br i1 false, label %6, label %8
+
+6:                                                ; preds = %5
+  %7 = extractelement <8 x float> zeroinitializer, i64 0
+  br label %8
+
+8:                                                ; preds = %6, %5, %1
+  %9 = phi float [ %7, %6 ], [ 0.000000e+00, %1 ], [ 0.000000e+00, %5 ]
+  %10 = phi float [ %2, %6 ], [ 0.000000e+00, %1 ], [ %2, %5 ]
+  %11 = phi float [ %7, %6 ], [ 0.000000e+00, %1 ], [ 0.000000e+00, %5 ]
+  %12 = phi float [ %7, %6 ], [ 0.000000e+00, %1 ], [ 0.000000e+00, %5 ]
+  %13 = phi float [ %7, %6 ], [ 0.000000e+00, %1 ], [ 0.000000e+00, %5 ]
+  %14 = phi float [ %3, %6 ], [ 0.000000e+00, %1 ], [ 0.000000e+00, %5 ]
+  %15 = phi float [ %4, %6 ], [ 0.000000e+00, %1 ], [ 0.000000e+00, %5 ]
+  %16 = phi float [ %4, %6 ], [ 0.000000e+00, %1 ], [ %3, %5 ]
+  br label %1
+}