[SLP]Fix crash on reordering of ScatterVectorize nodes.

ScatterVectorize nodes should be handled same way as gathers in reorderBottomToTop function, since we can simple reorder the loads in this node. Because of that need to include such nodes to the list of gathered nodes to fix compiler crash. Differential Revision: https://reviews.llvm.org/D126378
llvm · May 26, 2022 · 9139d48 · 9139d48
1 parent 4c549a0
commit 9139d48
Show file tree

Hide file tree

Showing 2 changed files with 79 additions and 0 deletions.
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3810,6 +3810,11 @@ bool BoUpSLP::canReorderOperands(
       // Add the node to the list of the ordered nodes with the identity
       // order.
       Edges.emplace_back(I, TE);
+      // Add ScatterVectorize nodes to the list of operands, where just
+      // reordering of the scalars is required. Similar to the gathers, so
+      // simply add to the list of gathered ops.
+      if (TE->State != TreeEntry::Vectorize)
+        GatherOps.push_back(TE);
       continue;
     }
     ArrayRef<Value *> VL = UserTE->getOperand(I);

diff --git a/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=cascadelake < %s | FileCheck %s
+
+define void @test() {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ARRAYIDX10_I_I86:%.*]] = getelementptr inbounds float, ptr undef, i64 2
+; CHECK-NEXT:    [[ARRAYIDX21_I:%.*]] = getelementptr inbounds [4 x float], ptr undef, i64 2
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x float>, ptr undef, align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub <2 x float> zeroinitializer, [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[ARRAYIDX10_I_I86]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr undef, align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x float> <float 0.000000e+00, float poison>, <2 x float> [[TMP0]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x float> <float poison, float 0.000000e+00>, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP8:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP6]], <2 x float> [[TMP7]])
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    br i1 false, label [[BB2:%.*]], label [[BB3:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    [[TMP9:%.*]] = fmul <2 x float> [[SHUFFLE]], zeroinitializer
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    [[TMP10:%.*]] = phi <2 x float> [ [[TMP9]], [[BB2]] ], [ zeroinitializer, [[BB1]] ]
+; CHECK-NEXT:    [[TMP11:%.*]] = fadd <2 x float> [[TMP1]], [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = fadd <2 x float> [[TMP11]], zeroinitializer
+; CHECK-NEXT:    [[TMP13:%.*]] = fsub <2 x float> [[TMP12]], zeroinitializer
+; CHECK-NEXT:    [[TMP14:%.*]] = fsub <2 x float> [[TMP13]], zeroinitializer
+; CHECK-NEXT:    store <2 x float> [[TMP14]], ptr [[ARRAYIDX21_I]], align 16
+; CHECK-NEXT:    ret void
+;
+entry:
+  %arrayidx10.i.i86 = getelementptr inbounds float, ptr undef, i64 2
+  %arrayidx6.i66.i = getelementptr inbounds float, ptr undef, i64 1
+  %arrayidx21.i = getelementptr inbounds [4 x float], ptr undef, i64 2
+  %arrayidx6.i109.i = getelementptr inbounds [4 x float], ptr undef, i64 2, i64 1
+  br label %bb1
+
+bb1:
+  %0 = load float, ptr undef, align 4
+  %sub.i71.i = fsub float 0.000000e+00, %0
+  %1 = load float, ptr %arrayidx6.i66.i, align 4
+  %sub5.i74.i = fsub float 0.000000e+00, %1
+  %2 = load float, ptr %arrayidx10.i.i86, align 4
+  %3 = call float @llvm.fmuladd.f32(float %1, float %2, float 0.000000e+00)
+  %4 = load float, ptr undef, align 4
+  %5 = call float @llvm.fmuladd.f32(float 0.000000e+00, float %4, float %2)
+  br i1 false, label %bb2, label %bb3
+
+bb2:
+  %mul.i95 = fmul float %3, 0.000000e+00
+  %mul3.i96 = fmul float %5, 0.000000e+00
+  br label %bb3
+
+bb3:
+  %vddir.sroa.8.0.i = phi float [ %mul3.i96, %bb2 ], [ 0.000000e+00, %bb1 ]
+  %vddir.sroa.0.0.i = phi float [ %mul.i95, %bb2 ], [ 0.000000e+00, %bb1 ]
+  %add.i.i = fadd float %sub.i71.i, %vddir.sroa.0.0.i
+  %add5.i.i = fadd float %sub5.i74.i, %vddir.sroa.8.0.i
+  %add.i105.i = fadd float %add.i.i, 0.000000e+00
+  %add5.i108.i = fadd float %add5.i.i, 0.000000e+00
+  %sub.i114.i = fsub float %add.i105.i, 0.000000e+00
+  %sub4.i.i = fsub float %add5.i108.i, 0.000000e+00
+  %sub.i118.i = fsub float %sub.i114.i, 0.000000e+00
+  store float %sub.i118.i, ptr %arrayidx21.i, align 16
+  %sub4.i121.i = fsub float %sub4.i.i, 0.000000e+00
+  store float %sub4.i121.i, ptr %arrayidx6.i109.i, align 4
+  ret void
+}
+
+declare float @llvm.fmuladd.f32(float, float, float)
+