Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SLP][NFC] Add test case exposing deficiency in finding reductions th…
…at feed buildvector sequence. Differential Revision: https://reviews.llvm.org/D132506
- Loading branch information
1 parent
002bfdd
commit e3dd0dd
Showing
1 changed file
with
194 additions
and
0 deletions.
There are no files selected for viewing
194 changes: 194 additions & 0 deletions
194
llvm/test/Transforms/SLPVectorizer/X86/redux-feed-buildvector.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,194 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | ||
; RUN: opt < %s -mtriple=x86_64 -slp-vectorizer -S -mcpu=skylake-avx512 | FileCheck %s | ||
|
||
; The test represents the case with multiple vectorization possibilities | ||
; but the most effective way to vectorize it is to match both 8-way reductions | ||
; feeding the insertelement vector build sequence. | ||
|
||
declare void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double>, <2 x double*>, i32 immarg, <2 x i1>) | ||
|
||
define void @test(double* nocapture readonly %arg, double* nocapture readonly %arg1, double* nocapture %arg2) { | ||
; CHECK-LABEL: @test( | ||
; CHECK-NEXT: entry: | ||
; CHECK-NEXT: [[GEP1_0:%.*]] = getelementptr inbounds double, double* [[ARG:%.*]], i64 1 | ||
; CHECK-NEXT: [[LD1_0:%.*]] = load double, double* [[GEP1_0]], align 8 | ||
; CHECK-NEXT: [[GEP2_0:%.*]] = getelementptr inbounds double, double* [[ARG1:%.*]], i64 16 | ||
; CHECK-NEXT: [[GEP1_1:%.*]] = getelementptr inbounds double, double* [[ARG]], i64 3 | ||
; CHECK-NEXT: [[LD1_1:%.*]] = load double, double* [[GEP1_1]], align 8 | ||
; CHECK-NEXT: [[GEP0_1:%.*]] = getelementptr inbounds double, double* [[ARG1]], i64 1 | ||
; CHECK-NEXT: [[GEP2_1:%.*]] = getelementptr inbounds double, double* [[ARG1]], i64 17 | ||
; CHECK-NEXT: [[GEP1_2:%.*]] = getelementptr inbounds double, double* [[ARG]], i64 5 | ||
; CHECK-NEXT: [[LD1_2:%.*]] = load double, double* [[GEP1_2]], align 8 | ||
; CHECK-NEXT: [[GEP0_2:%.*]] = getelementptr inbounds double, double* [[ARG1]], i64 2 | ||
; CHECK-NEXT: [[GEP2_2:%.*]] = getelementptr inbounds double, double* [[ARG1]], i64 18 | ||
; CHECK-NEXT: [[GEP1_3:%.*]] = getelementptr inbounds double, double* [[ARG]], i64 7 | ||
; CHECK-NEXT: [[LD1_3:%.*]] = load double, double* [[GEP1_3]], align 8 | ||
; CHECK-NEXT: [[GEP0_3:%.*]] = getelementptr inbounds double, double* [[ARG1]], i64 3 | ||
; CHECK-NEXT: [[GEP2_3:%.*]] = getelementptr inbounds double, double* [[ARG1]], i64 19 | ||
; CHECK-NEXT: [[GEP1_4:%.*]] = getelementptr inbounds double, double* [[ARG]], i64 9 | ||
; CHECK-NEXT: [[LD1_4:%.*]] = load double, double* [[GEP1_4]], align 8 | ||
; CHECK-NEXT: [[GEP0_4:%.*]] = getelementptr inbounds double, double* [[ARG1]], i64 4 | ||
; CHECK-NEXT: [[GEP2_4:%.*]] = getelementptr inbounds double, double* [[ARG1]], i64 20 | ||
; CHECK-NEXT: [[GEP1_5:%.*]] = getelementptr inbounds double, double* [[ARG]], i64 11 | ||
; CHECK-NEXT: [[LD1_5:%.*]] = load double, double* [[GEP1_5]], align 8 | ||
; CHECK-NEXT: [[GEP0_5:%.*]] = getelementptr inbounds double, double* [[ARG1]], i64 5 | ||
; CHECK-NEXT: [[GEP2_5:%.*]] = getelementptr inbounds double, double* [[ARG1]], i64 21 | ||
; CHECK-NEXT: [[GEP1_6:%.*]] = getelementptr inbounds double, double* [[ARG]], i64 13 | ||
; CHECK-NEXT: [[LD1_6:%.*]] = load double, double* [[GEP1_6]], align 8 | ||
; CHECK-NEXT: [[GEP0_6:%.*]] = getelementptr inbounds double, double* [[ARG1]], i64 6 | ||
; CHECK-NEXT: [[GEP2_6:%.*]] = getelementptr inbounds double, double* [[ARG1]], i64 22 | ||
; CHECK-NEXT: [[GEP1_7:%.*]] = getelementptr inbounds double, double* [[ARG]], i64 15 | ||
; CHECK-NEXT: [[LD1_7:%.*]] = load double, double* [[GEP1_7]], align 8 | ||
; CHECK-NEXT: [[GEP0_7:%.*]] = getelementptr inbounds double, double* [[ARG1]], i64 7 | ||
; CHECK-NEXT: [[GEP2_7:%.*]] = getelementptr inbounds double, double* [[ARG1]], i64 23 | ||
; CHECK-NEXT: [[LD0_0:%.*]] = load double, double* [[ARG1]], align 8 | ||
; CHECK-NEXT: [[LD2_0:%.*]] = load double, double* [[GEP2_0]], align 8 | ||
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[LD0_0]], i32 0 | ||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[LD2_0]], i32 1 | ||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[LD1_0]], i32 0 | ||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[LD1_0]], i32 1 | ||
; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x double> [[TMP1]], [[TMP3]] | ||
; CHECK-NEXT: [[LD0_1:%.*]] = load double, double* [[GEP0_1]], align 8 | ||
; CHECK-NEXT: [[LD2_1:%.*]] = load double, double* [[GEP2_1]], align 8 | ||
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[LD0_1]], i32 0 | ||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[LD2_1]], i32 1 | ||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[LD1_1]], i32 0 | ||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[LD1_1]], i32 1 | ||
; CHECK-NEXT: [[TMP9:%.*]] = fmul fast <2 x double> [[TMP6]], [[TMP8]] | ||
; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP9]] | ||
; CHECK-NEXT: [[LD0_2:%.*]] = load double, double* [[GEP0_2]], align 8 | ||
; CHECK-NEXT: [[LD2_2:%.*]] = load double, double* [[GEP2_2]], align 8 | ||
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x double> poison, double [[LD0_2]], i32 0 | ||
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x double> [[TMP11]], double [[LD2_2]], i32 1 | ||
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x double> poison, double [[LD1_2]], i32 0 | ||
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x double> [[TMP13]], double [[LD1_2]], i32 1 | ||
; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <2 x double> [[TMP12]], [[TMP14]] | ||
; CHECK-NEXT: [[TMP16:%.*]] = fadd fast <2 x double> [[TMP10]], [[TMP15]] | ||
; CHECK-NEXT: [[LD0_3:%.*]] = load double, double* [[GEP0_3]], align 8 | ||
; CHECK-NEXT: [[LD2_3:%.*]] = load double, double* [[GEP2_3]], align 8 | ||
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <2 x double> poison, double [[LD0_3]], i32 0 | ||
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x double> [[TMP17]], double [[LD2_3]], i32 1 | ||
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x double> poison, double [[LD1_3]], i32 0 | ||
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x double> [[TMP19]], double [[LD1_3]], i32 1 | ||
; CHECK-NEXT: [[TMP21:%.*]] = fmul fast <2 x double> [[TMP18]], [[TMP20]] | ||
; CHECK-NEXT: [[TMP22:%.*]] = fadd fast <2 x double> [[TMP16]], [[TMP21]] | ||
; CHECK-NEXT: [[LD0_4:%.*]] = load double, double* [[GEP0_4]], align 8 | ||
; CHECK-NEXT: [[LD2_4:%.*]] = load double, double* [[GEP2_4]], align 8 | ||
; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x double> poison, double [[LD0_4]], i32 0 | ||
; CHECK-NEXT: [[TMP24:%.*]] = insertelement <2 x double> [[TMP23]], double [[LD2_4]], i32 1 | ||
; CHECK-NEXT: [[TMP25:%.*]] = insertelement <2 x double> poison, double [[LD1_4]], i32 0 | ||
; CHECK-NEXT: [[TMP26:%.*]] = insertelement <2 x double> [[TMP25]], double [[LD1_4]], i32 1 | ||
; CHECK-NEXT: [[TMP27:%.*]] = fmul fast <2 x double> [[TMP24]], [[TMP26]] | ||
; CHECK-NEXT: [[TMP28:%.*]] = fadd fast <2 x double> [[TMP22]], [[TMP27]] | ||
; CHECK-NEXT: [[LD0_5:%.*]] = load double, double* [[GEP0_5]], align 8 | ||
; CHECK-NEXT: [[LD2_5:%.*]] = load double, double* [[GEP2_5]], align 8 | ||
; CHECK-NEXT: [[TMP29:%.*]] = insertelement <2 x double> poison, double [[LD0_5]], i32 0 | ||
; CHECK-NEXT: [[TMP30:%.*]] = insertelement <2 x double> [[TMP29]], double [[LD2_5]], i32 1 | ||
; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x double> poison, double [[LD1_5]], i32 0 | ||
; CHECK-NEXT: [[TMP32:%.*]] = insertelement <2 x double> [[TMP31]], double [[LD1_5]], i32 1 | ||
; CHECK-NEXT: [[TMP33:%.*]] = fmul fast <2 x double> [[TMP30]], [[TMP32]] | ||
; CHECK-NEXT: [[TMP34:%.*]] = fadd fast <2 x double> [[TMP28]], [[TMP33]] | ||
; CHECK-NEXT: [[LD0_6:%.*]] = load double, double* [[GEP0_6]], align 8 | ||
; CHECK-NEXT: [[LD2_6:%.*]] = load double, double* [[GEP2_6]], align 8 | ||
; CHECK-NEXT: [[TMP35:%.*]] = insertelement <2 x double> poison, double [[LD0_6]], i32 0 | ||
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <2 x double> [[TMP35]], double [[LD2_6]], i32 1 | ||
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <2 x double> poison, double [[LD1_6]], i32 0 | ||
; CHECK-NEXT: [[TMP38:%.*]] = insertelement <2 x double> [[TMP37]], double [[LD1_6]], i32 1 | ||
; CHECK-NEXT: [[TMP39:%.*]] = fmul fast <2 x double> [[TMP36]], [[TMP38]] | ||
; CHECK-NEXT: [[TMP40:%.*]] = fadd fast <2 x double> [[TMP34]], [[TMP39]] | ||
; CHECK-NEXT: [[LD0_7:%.*]] = load double, double* [[GEP0_7]], align 8 | ||
; CHECK-NEXT: [[LD2_7:%.*]] = load double, double* [[GEP2_7]], align 8 | ||
; CHECK-NEXT: [[TMP41:%.*]] = insertelement <2 x double> poison, double [[LD0_7]], i32 0 | ||
; CHECK-NEXT: [[TMP42:%.*]] = insertelement <2 x double> [[TMP41]], double [[LD2_7]], i32 1 | ||
; CHECK-NEXT: [[TMP43:%.*]] = insertelement <2 x double> poison, double [[LD1_7]], i32 0 | ||
; CHECK-NEXT: [[TMP44:%.*]] = insertelement <2 x double> [[TMP43]], double [[LD1_7]], i32 1 | ||
; CHECK-NEXT: [[TMP45:%.*]] = fmul fast <2 x double> [[TMP42]], [[TMP44]] | ||
; CHECK-NEXT: [[TMP46:%.*]] = fadd fast <2 x double> [[TMP40]], [[TMP45]] | ||
; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds double, double* [[ARG2:%.*]], <2 x i64> <i64 0, i64 16> | ||
; CHECK-NEXT: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> [[TMP46]], <2 x double*> [[P]], i32 8, <2 x i1> <i1 true, i1 true>) | ||
; CHECK-NEXT: ret void | ||
; | ||
entry: | ||
%gep1.0 = getelementptr inbounds double, double* %arg, i64 1 | ||
%ld1.0 = load double, double* %gep1.0, align 8 | ||
%ld0.0 = load double, double* %arg1, align 8 | ||
%mul1.0 = fmul fast double %ld0.0, %ld1.0 | ||
%gep2.0 = getelementptr inbounds double, double* %arg1, i64 16 | ||
%ld2.0 = load double, double* %gep2.0, align 8 | ||
%mul2.0 = fmul fast double %ld2.0, %ld1.0 | ||
%gep1.1 = getelementptr inbounds double, double* %arg, i64 3 | ||
%ld1.1 = load double, double* %gep1.1, align 8 | ||
%gep0.1 = getelementptr inbounds double, double* %arg1, i64 1 | ||
%ld0.1 = load double, double* %gep0.1, align 8 | ||
%mul1.1 = fmul fast double %ld0.1, %ld1.1 | ||
%rdx1.0 = fadd fast double %mul1.0, %mul1.1 | ||
%gep2.1 = getelementptr inbounds double, double* %arg1, i64 17 | ||
%ld2.1 = load double, double* %gep2.1, align 8 | ||
%mul2.1 = fmul fast double %ld2.1, %ld1.1 | ||
%rdx2.0 = fadd fast double %mul2.0, %mul2.1 | ||
%gep1.2 = getelementptr inbounds double, double* %arg, i64 5 | ||
%ld1.2 = load double, double* %gep1.2, align 8 | ||
%gep0.2 = getelementptr inbounds double, double* %arg1, i64 2 | ||
%ld0.2 = load double, double* %gep0.2, align 8 | ||
%mul1.2 = fmul fast double %ld0.2, %ld1.2 | ||
%rdx1.1 = fadd fast double %rdx1.0, %mul1.2 | ||
%gep2.2 = getelementptr inbounds double, double* %arg1, i64 18 | ||
%ld2.2 = load double, double* %gep2.2, align 8 | ||
%mul2.2 = fmul fast double %ld2.2, %ld1.2 | ||
%rdx2.1 = fadd fast double %rdx2.0, %mul2.2 | ||
%gep1.3 = getelementptr inbounds double, double* %arg, i64 7 | ||
%ld1.3 = load double, double* %gep1.3, align 8 | ||
%gep0.3 = getelementptr inbounds double, double* %arg1, i64 3 | ||
%ld0.3 = load double, double* %gep0.3, align 8 | ||
%mul1.3 = fmul fast double %ld0.3, %ld1.3 | ||
%rdx1.2 = fadd fast double %rdx1.1, %mul1.3 | ||
%gep2.3 = getelementptr inbounds double, double* %arg1, i64 19 | ||
%ld2.3 = load double, double* %gep2.3, align 8 | ||
%mul2.3 = fmul fast double %ld2.3, %ld1.3 | ||
%rdx2.2 = fadd fast double %rdx2.1, %mul2.3 | ||
%gep1.4 = getelementptr inbounds double, double* %arg, i64 9 | ||
%ld1.4 = load double, double* %gep1.4, align 8 | ||
%gep0.4 = getelementptr inbounds double, double* %arg1, i64 4 | ||
%ld0.4 = load double, double* %gep0.4, align 8 | ||
%mul1.4 = fmul fast double %ld0.4, %ld1.4 | ||
%rdx1.3 = fadd fast double %rdx1.2, %mul1.4 | ||
%gep2.4 = getelementptr inbounds double, double* %arg1, i64 20 | ||
%ld2.4 = load double, double* %gep2.4, align 8 | ||
%mul2.4 = fmul fast double %ld2.4, %ld1.4 | ||
%rdx2.3 = fadd fast double %rdx2.2, %mul2.4 | ||
%gep1.5 = getelementptr inbounds double, double* %arg, i64 11 | ||
%ld1.5 = load double, double* %gep1.5, align 8 | ||
%gep0.5 = getelementptr inbounds double, double* %arg1, i64 5 | ||
%ld0.5 = load double, double* %gep0.5, align 8 | ||
%mul1.5 = fmul fast double %ld0.5, %ld1.5 | ||
%rdx1.4 = fadd fast double %rdx1.3, %mul1.5 | ||
%gep2.5 = getelementptr inbounds double, double* %arg1, i64 21 | ||
%ld2.5 = load double, double* %gep2.5, align 8 | ||
%mul2.5 = fmul fast double %ld2.5, %ld1.5 | ||
%rdx2.4 = fadd fast double %rdx2.3, %mul2.5 | ||
%gep1.6 = getelementptr inbounds double, double* %arg, i64 13 | ||
%ld1.6 = load double, double* %gep1.6, align 8 | ||
%gep0.6 = getelementptr inbounds double, double* %arg1, i64 6 | ||
%ld0.6 = load double, double* %gep0.6, align 8 | ||
%mul1.6 = fmul fast double %ld0.6, %ld1.6 | ||
%rdx1.5 = fadd fast double %rdx1.4, %mul1.6 | ||
%gep2.6 = getelementptr inbounds double, double* %arg1, i64 22 | ||
%ld2.6 = load double, double* %gep2.6, align 8 | ||
%mul2.6 = fmul fast double %ld2.6, %ld1.6 | ||
%rdx2.5 = fadd fast double %rdx2.4, %mul2.6 | ||
%gep1.7 = getelementptr inbounds double, double* %arg, i64 15 | ||
%ld1.7 = load double, double* %gep1.7, align 8 | ||
%gep0.7 = getelementptr inbounds double, double* %arg1, i64 7 | ||
%ld0.7 = load double, double* %gep0.7, align 8 | ||
%mul1.7 = fmul fast double %ld0.7, %ld1.7 | ||
%rdx1 = fadd fast double %rdx1.5, %mul1.7 | ||
%gep2.7 = getelementptr inbounds double, double* %arg1, i64 23 | ||
%ld2.7 = load double, double* %gep2.7, align 8 | ||
%mul2.7 = fmul fast double %ld2.7, %ld1.7 | ||
%rdx2 = fadd fast double %rdx2.5, %mul2.7 | ||
%i142 = insertelement <2 x double> poison, double %rdx1, i64 0 | ||
%i143 = insertelement <2 x double> %i142, double %rdx2, i64 1 | ||
%p = getelementptr inbounds double, double* %arg2, <2 x i64> <i64 0, i64 16> | ||
call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %i143, <2 x double*> %p, i32 8, <2 x i1> <i1 true, i1 true>) | ||
ret void | ||
} |