Skip to content

Commit

Permalink
[SLP]Fix crash on reordering of ScatterVectorize nodes.
Browse files Browse the repository at this point in the history
ScatterVectorize nodes should be handled same way as gathers in
reorderBottomToTop function, since we can simple reorder the loads in
this node. Because of that need to include such nodes to the list of
gathered nodes to fix compiler crash.

Differential Revision: https://reviews.llvm.org/D126378
  • Loading branch information
alexey-bataev committed May 26, 2022
1 parent 4c549a0 commit 9139d48
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 0 deletions.
5 changes: 5 additions & 0 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Expand Up @@ -3810,6 +3810,11 @@ bool BoUpSLP::canReorderOperands(
// Add the node to the list of the ordered nodes with the identity
// order.
Edges.emplace_back(I, TE);
// Add ScatterVectorize nodes to the list of operands, where just
// reordering of the scalars is required. Similar to the gathers, so
// simply add to the list of gathered ops.
if (TE->State != TreeEntry::Vectorize)
GatherOps.push_back(TE);
continue;
}
ArrayRef<Value *> VL = UserTE->getOperand(I);
Expand Down
@@ -0,0 +1,74 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=cascadelake < %s | FileCheck %s

define void @test() {
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX10_I_I86:%.*]] = getelementptr inbounds float, ptr undef, i64 2
; CHECK-NEXT: [[ARRAYIDX21_I:%.*]] = getelementptr inbounds [4 x float], ptr undef, i64 2
; CHECK-NEXT: br label [[BB1:%.*]]
; CHECK: bb1:
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr undef, align 4
; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x float> zeroinitializer, [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX10_I_I86]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr undef, align 4
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> <float 0.000000e+00, float poison>, <2 x float> [[TMP0]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP2]], i32 1
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> <float poison, float 0.000000e+00>, float [[TMP2]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP6]], <2 x float> [[TMP7]])
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: br i1 false, label [[BB2:%.*]], label [[BB3:%.*]]
; CHECK: bb2:
; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x float> [[SHUFFLE]], zeroinitializer
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x float> [ [[TMP9]], [[BB2]] ], [ zeroinitializer, [[BB1]] ]
; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x float> [[TMP1]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = fadd <2 x float> [[TMP11]], zeroinitializer
; CHECK-NEXT: [[TMP13:%.*]] = fsub <2 x float> [[TMP12]], zeroinitializer
; CHECK-NEXT: [[TMP14:%.*]] = fsub <2 x float> [[TMP13]], zeroinitializer
; CHECK-NEXT: store <2 x float> [[TMP14]], ptr [[ARRAYIDX21_I]], align 16
; CHECK-NEXT: ret void
;
entry:
%arrayidx10.i.i86 = getelementptr inbounds float, ptr undef, i64 2
%arrayidx6.i66.i = getelementptr inbounds float, ptr undef, i64 1
%arrayidx21.i = getelementptr inbounds [4 x float], ptr undef, i64 2
%arrayidx6.i109.i = getelementptr inbounds [4 x float], ptr undef, i64 2, i64 1
br label %bb1

bb1:
%0 = load float, ptr undef, align 4
%sub.i71.i = fsub float 0.000000e+00, %0
%1 = load float, ptr %arrayidx6.i66.i, align 4
%sub5.i74.i = fsub float 0.000000e+00, %1
%2 = load float, ptr %arrayidx10.i.i86, align 4
%3 = call float @llvm.fmuladd.f32(float %1, float %2, float 0.000000e+00)
%4 = load float, ptr undef, align 4
%5 = call float @llvm.fmuladd.f32(float 0.000000e+00, float %4, float %2)
br i1 false, label %bb2, label %bb3

bb2:
%mul.i95 = fmul float %3, 0.000000e+00
%mul3.i96 = fmul float %5, 0.000000e+00
br label %bb3

bb3:
%vddir.sroa.8.0.i = phi float [ %mul3.i96, %bb2 ], [ 0.000000e+00, %bb1 ]
%vddir.sroa.0.0.i = phi float [ %mul.i95, %bb2 ], [ 0.000000e+00, %bb1 ]
%add.i.i = fadd float %sub.i71.i, %vddir.sroa.0.0.i
%add5.i.i = fadd float %sub5.i74.i, %vddir.sroa.8.0.i
%add.i105.i = fadd float %add.i.i, 0.000000e+00
%add5.i108.i = fadd float %add5.i.i, 0.000000e+00
%sub.i114.i = fsub float %add.i105.i, 0.000000e+00
%sub4.i.i = fsub float %add5.i108.i, 0.000000e+00
%sub.i118.i = fsub float %sub.i114.i, 0.000000e+00
store float %sub.i118.i, ptr %arrayidx21.i, align 16
%sub4.i121.i = fsub float %sub4.i.i, 0.000000e+00
store float %sub4.i121.i, ptr %arrayidx6.i109.i, align 4
ret void
}

declare float @llvm.fmuladd.f32(float, float, float)

0 comments on commit 9139d48

Please sign in to comment.