From 9139d484d46a0b63275e00b988895bfb419bbe71 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 25 May 2022 07:15:49 -0700 Subject: [PATCH] [SLP]Fix crash on reordering of ScatterVectorize nodes. ScatterVectorize nodes should be handled same way as gathers in reorderBottomToTop function, since we can simple reorder the loads in this node. Because of that need to include such nodes to the list of gathered nodes to fix compiler crash. Differential Revision: https://reviews.llvm.org/D126378 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 5 ++ .../X86/scatter-vectorize-reorder.ll | 74 +++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 8cedb63eead04..5efb7148cc03d 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3810,6 +3810,11 @@ bool BoUpSLP::canReorderOperands( // Add the node to the list of the ordered nodes with the identity // order. Edges.emplace_back(I, TE); + // Add ScatterVectorize nodes to the list of operands, where just + // reordering of the scalars is required. Similar to the gathers, so + // simply add to the list of gathered ops. + if (TE->State != TreeEntry::Vectorize) + GatherOps.push_back(TE); continue; } ArrayRef VL = UserTE->getOperand(I); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder.ll new file mode 100644 index 0000000000000..e269117102fab --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=cascadelake < %s | FileCheck %s + +define void @test() { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX10_I_I86:%.*]] = getelementptr inbounds float, ptr undef, i64 2 +; CHECK-NEXT: [[ARRAYIDX21_I:%.*]] = getelementptr inbounds [4 x float], ptr undef, i64 2 +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr undef, align 4 +; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x float> zeroinitializer, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX10_I_I86]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr undef, align 4 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> , <2 x float> [[TMP0]], <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> , float [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP6]], <2 x float> [[TMP7]]) +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <2 x i32> +; CHECK-NEXT: br i1 false, label [[BB2:%.*]], label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x float> [[SHUFFLE]], zeroinitializer +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x float> [ [[TMP9]], [[BB2]] ], [ zeroinitializer, [[BB1]] ] +; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x float> [[TMP1]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = fadd <2 x float> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = fsub <2 x float> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = fsub <2 x float> [[TMP13]], zeroinitializer +; CHECK-NEXT: store <2 x float> [[TMP14]], ptr [[ARRAYIDX21_I]], align 16 +; CHECK-NEXT: ret void +; +entry: + %arrayidx10.i.i86 = getelementptr inbounds float, ptr undef, i64 2 + %arrayidx6.i66.i = getelementptr inbounds float, ptr undef, i64 1 + %arrayidx21.i = getelementptr inbounds [4 x float], ptr undef, i64 2 + %arrayidx6.i109.i = getelementptr inbounds [4 x float], ptr undef, i64 2, i64 1 + br label %bb1 + +bb1: + %0 = load float, ptr undef, align 4 + %sub.i71.i = fsub float 0.000000e+00, %0 + %1 = load float, ptr %arrayidx6.i66.i, align 4 + %sub5.i74.i = fsub float 0.000000e+00, %1 + %2 = load float, ptr %arrayidx10.i.i86, align 4 + %3 = call float @llvm.fmuladd.f32(float %1, float %2, float 0.000000e+00) + %4 = load float, ptr undef, align 4 + %5 = call float @llvm.fmuladd.f32(float 0.000000e+00, float %4, float %2) + br i1 false, label %bb2, label %bb3 + +bb2: + %mul.i95 = fmul float %3, 0.000000e+00 + %mul3.i96 = fmul float %5, 0.000000e+00 + br label %bb3 + +bb3: + %vddir.sroa.8.0.i = phi float [ %mul3.i96, %bb2 ], [ 0.000000e+00, %bb1 ] + %vddir.sroa.0.0.i = phi float [ %mul.i95, %bb2 ], [ 0.000000e+00, %bb1 ] + %add.i.i = fadd float %sub.i71.i, %vddir.sroa.0.0.i + %add5.i.i = fadd float %sub5.i74.i, %vddir.sroa.8.0.i + %add.i105.i = fadd float %add.i.i, 0.000000e+00 + %add5.i108.i = fadd float %add5.i.i, 0.000000e+00 + %sub.i114.i = fsub float %add.i105.i, 0.000000e+00 + %sub4.i.i = fsub float %add5.i108.i, 0.000000e+00 + %sub.i118.i = fsub float %sub.i114.i, 0.000000e+00 + store float %sub.i118.i, ptr %arrayidx21.i, align 16 + %sub4.i121.i = fsub float %sub4.i.i, 0.000000e+00 + store float %sub4.i121.i, ptr %arrayidx6.i109.i, align 4 + ret void +} + +declare float @llvm.fmuladd.f32(float, float, float) +