Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SLP]Fix crash on reordering of ScatterVectorize nodes.
ScatterVectorize nodes should be handled same way as gathers in reorderBottomToTop function, since we can simple reorder the loads in this node. Because of that need to include such nodes to the list of gathered nodes to fix compiler crash. Differential Revision: https://reviews.llvm.org/D126378
- Loading branch information
1 parent
4c549a0
commit 9139d48
Showing
2 changed files
with
79 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
74 changes: 74 additions & 0 deletions
74
llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | ||
; RUN: opt -S -slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=cascadelake < %s | FileCheck %s | ||
|
||
define void @test() { | ||
; CHECK-LABEL: @test( | ||
; CHECK-NEXT: entry: | ||
; CHECK-NEXT: [[ARRAYIDX10_I_I86:%.*]] = getelementptr inbounds float, ptr undef, i64 2 | ||
; CHECK-NEXT: [[ARRAYIDX21_I:%.*]] = getelementptr inbounds [4 x float], ptr undef, i64 2 | ||
; CHECK-NEXT: br label [[BB1:%.*]] | ||
; CHECK: bb1: | ||
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr undef, align 4 | ||
; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x float> zeroinitializer, [[TMP0]] | ||
; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX10_I_I86]], align 4 | ||
; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr undef, align 4 | ||
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> <float 0.000000e+00, float poison>, <2 x float> [[TMP0]], <2 x i32> <i32 0, i32 3> | ||
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i32 0 | ||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP2]], i32 1 | ||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> <float poison, float 0.000000e+00>, float [[TMP2]], i32 0 | ||
; CHECK-NEXT: [[TMP8:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP6]], <2 x float> [[TMP7]]) | ||
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <2 x i32> <i32 1, i32 0> | ||
; CHECK-NEXT: br i1 false, label [[BB2:%.*]], label [[BB3:%.*]] | ||
; CHECK: bb2: | ||
; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x float> [[SHUFFLE]], zeroinitializer | ||
; CHECK-NEXT: br label [[BB3]] | ||
; CHECK: bb3: | ||
; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x float> [ [[TMP9]], [[BB2]] ], [ zeroinitializer, [[BB1]] ] | ||
; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x float> [[TMP1]], [[TMP10]] | ||
; CHECK-NEXT: [[TMP12:%.*]] = fadd <2 x float> [[TMP11]], zeroinitializer | ||
; CHECK-NEXT: [[TMP13:%.*]] = fsub <2 x float> [[TMP12]], zeroinitializer | ||
; CHECK-NEXT: [[TMP14:%.*]] = fsub <2 x float> [[TMP13]], zeroinitializer | ||
; CHECK-NEXT: store <2 x float> [[TMP14]], ptr [[ARRAYIDX21_I]], align 16 | ||
; CHECK-NEXT: ret void | ||
; | ||
entry: | ||
%arrayidx10.i.i86 = getelementptr inbounds float, ptr undef, i64 2 | ||
%arrayidx6.i66.i = getelementptr inbounds float, ptr undef, i64 1 | ||
%arrayidx21.i = getelementptr inbounds [4 x float], ptr undef, i64 2 | ||
%arrayidx6.i109.i = getelementptr inbounds [4 x float], ptr undef, i64 2, i64 1 | ||
br label %bb1 | ||
|
||
bb1: | ||
%0 = load float, ptr undef, align 4 | ||
%sub.i71.i = fsub float 0.000000e+00, %0 | ||
%1 = load float, ptr %arrayidx6.i66.i, align 4 | ||
%sub5.i74.i = fsub float 0.000000e+00, %1 | ||
%2 = load float, ptr %arrayidx10.i.i86, align 4 | ||
%3 = call float @llvm.fmuladd.f32(float %1, float %2, float 0.000000e+00) | ||
%4 = load float, ptr undef, align 4 | ||
%5 = call float @llvm.fmuladd.f32(float 0.000000e+00, float %4, float %2) | ||
br i1 false, label %bb2, label %bb3 | ||
|
||
bb2: | ||
%mul.i95 = fmul float %3, 0.000000e+00 | ||
%mul3.i96 = fmul float %5, 0.000000e+00 | ||
br label %bb3 | ||
|
||
bb3: | ||
%vddir.sroa.8.0.i = phi float [ %mul3.i96, %bb2 ], [ 0.000000e+00, %bb1 ] | ||
%vddir.sroa.0.0.i = phi float [ %mul.i95, %bb2 ], [ 0.000000e+00, %bb1 ] | ||
%add.i.i = fadd float %sub.i71.i, %vddir.sroa.0.0.i | ||
%add5.i.i = fadd float %sub5.i74.i, %vddir.sroa.8.0.i | ||
%add.i105.i = fadd float %add.i.i, 0.000000e+00 | ||
%add5.i108.i = fadd float %add5.i.i, 0.000000e+00 | ||
%sub.i114.i = fsub float %add.i105.i, 0.000000e+00 | ||
%sub4.i.i = fsub float %add5.i108.i, 0.000000e+00 | ||
%sub.i118.i = fsub float %sub.i114.i, 0.000000e+00 | ||
store float %sub.i118.i, ptr %arrayidx21.i, align 16 | ||
%sub4.i121.i = fsub float %sub4.i.i, 0.000000e+00 | ||
store float %sub4.i121.i, ptr %arrayidx6.i109.i, align 4 | ||
ret void | ||
} | ||
|
||
declare float @llvm.fmuladd.f32(float, float, float) | ||
|