Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SLP]Fix PR59230: Use actual vector factor when sorting entries.
When we sort entries for attempting to reorder scalars, need to use actual vectorization factor, not the number of scalars. Otherwise the compiler crashes, if the scalars has to be reordered. Differential Revision: https://reviews.llvm.org/D138819
- Loading branch information
1 parent
1fd4d91
commit 0cc1505
Showing
2 changed files
with
46 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
43 changes: 43 additions & 0 deletions
43
llvm/test/Transforms/SLPVectorizer/X86/reorder-vf-to-resize.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | ||
; RUN: opt -passes=slp-vectorizer -mcpu=skx -mtriple=x86_64-unknown-linux-gnu -S < %s | FileCheck %s | ||
|
||
define void @main(ptr %0) { | ||
; CHECK-LABEL: @main( | ||
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[TMP0:%.*]], align 8 | ||
; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> zeroinitializer, [[TMP2]] | ||
; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> zeroinitializer, [[TMP2]] | ||
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP4]], <4 x i32> <i32 1, i32 2, i32 1, i32 2> | ||
; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x double> [[TMP5]], zeroinitializer | ||
; CHECK-NEXT: [[TMP7:%.*]] = call <4 x double> @llvm.fabs.v4f64(<4 x double> [[TMP6]]) | ||
; CHECK-NEXT: [[TMP8:%.*]] = fcmp oeq <4 x double> [[TMP7]], zeroinitializer | ||
; CHECK-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP8]]) | ||
; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], double 0.000000e+00, double 0.000000e+00 | ||
; CHECK-NEXT: store double [[TMP10]], ptr null, align 8 | ||
; CHECK-NEXT: ret void | ||
; | ||
%.unpack = load double, ptr %0, align 8 | ||
%.elt1 = getelementptr { double, double }, ptr %0, i64 0, i32 1 | ||
%.unpack2 = load double, ptr %.elt1, align 8 | ||
%2 = fadd double %.unpack, 0.000000e+00 | ||
%3 = fsub double 0.000000e+00, %.unpack2 | ||
%4 = fmul double %2, 0.000000e+00 | ||
%5 = call double @llvm.fabs.f64(double %4) | ||
%6 = fmul double %3, 0.000000e+00 | ||
%7 = call double @llvm.fabs.f64(double %6) | ||
%8 = fmul double %3, 0.000000e+00 | ||
%9 = call double @llvm.fabs.f64(double %8) | ||
%10 = fmul double %2, 0.000000e+00 | ||
%11 = call double @llvm.fabs.f64(double %10) | ||
%12 = fcmp oeq double %5, 0.000000e+00 | ||
%13 = fcmp oeq double %7, 0.000000e+00 | ||
%14 = or i1 %12, %13 | ||
%15 = fcmp oeq double %11, 0.000000e+00 | ||
%16 = or i1 %14, %15 | ||
%17 = fcmp oeq double %9, 0.000000e+00 | ||
%18 = or i1 %16, %17 | ||
%19 = select i1 %18, double 0.000000e+00, double 0.000000e+00 | ||
store double %19, ptr null, align 8 | ||
ret void | ||
} | ||
|
||
declare double @llvm.fabs.f64(double) |