Skip to content

Commit

Permalink
[SLP]Fix PR59230: Use actual vector factor when sorting entries.
Browse files Browse the repository at this point in the history
When we sort entries for attempting to reorder scalars, need to use
actual vectorization factor, not the number of scalars. Otherwise the
compiler crashes, if the scalars has to be reordered.

Differential Revision: https://reviews.llvm.org/D138819
  • Loading branch information
alexey-bataev committed Nov 29, 2022
1 parent 1fd4d91 commit 0cc1505
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 3 deletions.
6 changes: 3 additions & 3 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Expand Up @@ -4083,7 +4083,7 @@ void BoUpSLP::reorderTopToBottom() {
SmallVector<OrdersType, 1> ExternalUserReorderIndices =
findExternalStoreUsersReorderIndices(TE.get());
if (!ExternalUserReorderIndices.empty()) {
VFToOrderedEntries[TE->Scalars.size()].insert(TE.get());
VFToOrderedEntries[TE->getVectorFactor()].insert(TE.get());
ExternalUserReorderMap.try_emplace(TE.get(),
std::move(ExternalUserReorderIndices));
}
Expand All @@ -4103,7 +4103,7 @@ void BoUpSLP::reorderTopToBottom() {
OpcodeMask.set(Lane);
// If this pattern is supported by the target then we consider the order.
if (TTIRef.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask)) {
VFToOrderedEntries[TE->Scalars.size()].insert(TE.get());
VFToOrderedEntries[TE->getVectorFactor()].insert(TE.get());
AltShufflesToOrders.try_emplace(TE.get(), OrdersType());
}
// TODO: Check the reverse order too.
Expand Down Expand Up @@ -4141,7 +4141,7 @@ void BoUpSLP::reorderTopToBottom() {
});

// Reorder the graph nodes according to their vectorization factor.
for (unsigned VF = VectorizableTree.front()->Scalars.size(); VF > 1;
for (unsigned VF = VectorizableTree.front()->getVectorFactor(); VF > 1;
VF /= 2) {
auto It = VFToOrderedEntries.find(VF);
if (It == VFToOrderedEntries.end())
Expand Down
43 changes: 43 additions & 0 deletions llvm/test/Transforms/SLPVectorizer/X86/reorder-vf-to-resize.ll
@@ -0,0 +1,43 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=slp-vectorizer -mcpu=skx -mtriple=x86_64-unknown-linux-gnu -S < %s | FileCheck %s

define void @main(ptr %0) {
; CHECK-LABEL: @main(
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[TMP0:%.*]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> zeroinitializer, [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> zeroinitializer, [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP4]], <4 x i32> <i32 1, i32 2, i32 1, i32 2>
; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x double> [[TMP5]], zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = call <4 x double> @llvm.fabs.v4f64(<4 x double> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = fcmp oeq <4 x double> [[TMP7]], zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP8]])
; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], double 0.000000e+00, double 0.000000e+00
; CHECK-NEXT: store double [[TMP10]], ptr null, align 8
; CHECK-NEXT: ret void
;
%.unpack = load double, ptr %0, align 8
%.elt1 = getelementptr { double, double }, ptr %0, i64 0, i32 1
%.unpack2 = load double, ptr %.elt1, align 8
%2 = fadd double %.unpack, 0.000000e+00
%3 = fsub double 0.000000e+00, %.unpack2
%4 = fmul double %2, 0.000000e+00
%5 = call double @llvm.fabs.f64(double %4)
%6 = fmul double %3, 0.000000e+00
%7 = call double @llvm.fabs.f64(double %6)
%8 = fmul double %3, 0.000000e+00
%9 = call double @llvm.fabs.f64(double %8)
%10 = fmul double %2, 0.000000e+00
%11 = call double @llvm.fabs.f64(double %10)
%12 = fcmp oeq double %5, 0.000000e+00
%13 = fcmp oeq double %7, 0.000000e+00
%14 = or i1 %12, %13
%15 = fcmp oeq double %11, 0.000000e+00
%16 = or i1 %14, %15
%17 = fcmp oeq double %9, 0.000000e+00
%18 = or i1 %16, %17
%19 = select i1 %18, double 0.000000e+00, double 0.000000e+00
store double %19, ptr null, align 8
ret void
}

declare double @llvm.fabs.f64(double)

0 comments on commit 0cc1505

Please sign in to comment.