Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[NFC][X86] Duplicate LV test into a costmodel test
Copied from llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll As discussed in D111460 / D112877 / D112873 we have basically no test coverage for this part of cost model.
- Loading branch information
Showing
1 changed file
with
204 additions
and
0 deletions.
There are no files selected for viewing
204 changes: 204 additions & 0 deletions
204
llvm/test/Analysis/CostModel/X86/interleaved-store-accesses-with-gaps.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,204 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | ||
; RUN: opt -mcpu=skx -S -loop-vectorize -enable-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s -check-prefix=DISABLED_MASKED_STRIDED | ||
; RUN: opt -mcpu=skx -S -loop-vectorize -enable-interleaved-mem-accesses -enable-masked-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s -check-prefix=ENABLED_MASKED_STRIDED | ||
|
||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" | ||
target triple = "x86_64-unknown-linux-gnu" | ||
|
||
; (1) Interleave-group with factor 4, storing only 2 members out of the 4. | ||
; Check that when we allow masked-memops to support interleave-group with gaps, | ||
; the store is vectorized using a wide masked store, with a 1,1,0,0,1,1,0,0,... mask. | ||
; Check that when we don't allow masked-memops to support interleave-group with gaps, | ||
; the store is scalarized. | ||
; The input IR was generated from this source: | ||
; for(i=0;i<1024;i++){ | ||
; points[i*4] = x[i]; | ||
; points[i*4 + 1] = y[i]; | ||
; } | ||
; (relates to the testcase in PR50566) | ||
|
||
; DISABLED_MASKED_STRIDED: LV: Checking a loop in "test1" | ||
; | ||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx2, align 2 | ||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, i16* %arrayidx7, align 2 | ||
; | ||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %0, i16* %arrayidx2, align 2 | ||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %2, i16* %arrayidx7, align 2 | ||
; | ||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 54 for VF 4 For instruction: store i16 %0, i16* %arrayidx2, align 2 | ||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 54 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2 | ||
; | ||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 110 for VF 8 For instruction: store i16 %0, i16* %arrayidx2, align 2 | ||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 110 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2 | ||
; | ||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 228 for VF 16 For instruction: store i16 %0, i16* %arrayidx2, align 2 | ||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 228 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2 | ||
|
||
; ENABLED_MASKED_STRIDED: LV: Checking a loop in "test1" | ||
; | ||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx2, align 2 | ||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, i16* %arrayidx7, align 2 | ||
; | ||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 2 For instruction: store i16 %0, i16* %arrayidx2, align 2 | ||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 9 for VF 2 For instruction: store i16 %2, i16* %arrayidx7, align 2 | ||
; | ||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 4 For instruction: store i16 %0, i16* %arrayidx2, align 2 | ||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 20 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2 | ||
; | ||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 8 For instruction: store i16 %0, i16* %arrayidx2, align 2 | ||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 40 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2 | ||
; | ||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 16 For instruction: store i16 %0, i16* %arrayidx2, align 2 | ||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 96 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2 | ||
|
||
define void @test1(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) { | ||
entry: | ||
br label %for.body | ||
|
||
for.body: | ||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] | ||
%arrayidx = getelementptr inbounds i16, i16* %x, i64 %indvars.iv | ||
%0 = load i16, i16* %arrayidx, align 2 | ||
%1 = shl nuw nsw i64 %indvars.iv, 2 | ||
%arrayidx2 = getelementptr inbounds i16, i16* %points, i64 %1 | ||
store i16 %0, i16* %arrayidx2, align 2 | ||
%arrayidx4 = getelementptr inbounds i16, i16* %y, i64 %indvars.iv | ||
%2 = load i16, i16* %arrayidx4, align 2 | ||
%3 = or i64 %1, 1 | ||
%arrayidx7 = getelementptr inbounds i16, i16* %points, i64 %3 | ||
store i16 %2, i16* %arrayidx7, align 2 | ||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | ||
%exitcond.not = icmp eq i64 %indvars.iv.next, 1024 | ||
br i1 %exitcond.not, label %for.end, label %for.body | ||
|
||
for.end: | ||
ret void | ||
} | ||
|
||
; (2) Same as above, but this time the gaps mask of the store is also And-ed with the | ||
; fold-tail mask. If using masked memops to vectorize interleaved-group with gaps is | ||
; not allowed, the store is scalarized and predicated. | ||
; The input IR was generated from this source: | ||
; for(i=0;i<numPoints;i++){ | ||
; points[i*4] = x[i]; | ||
; points[i*4 + 1] = y[i]; | ||
; } | ||
|
||
; DISABLED_MASKED_STRIDED: LV: Checking a loop in "test2" | ||
; | ||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx2, align 2 | ||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, i16* %arrayidx7, align 2 | ||
; | ||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 15 for VF 2 For instruction: store i16 %0, i16* %arrayidx2, align 2 | ||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 2 For instruction: store i16 %2, i16* %arrayidx7, align 2 | ||
; | ||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 31 for VF 4 For instruction: store i16 %0, i16* %arrayidx2, align 2 | ||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2 | ||
; | ||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 63 for VF 8 For instruction: store i16 %0, i16* %arrayidx2, align 2 | ||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2 | ||
; | ||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 130 for VF 16 For instruction: store i16 %0, i16* %arrayidx2, align 2 | ||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2 | ||
|
||
; ENABLED_MASKED_STRIDED: LV: Checking a loop in "test2" | ||
; | ||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx2, align 2 | ||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, i16* %arrayidx7, align 2 | ||
; | ||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 2 For instruction: store i16 %0, i16* %arrayidx2, align 2 | ||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 20 for VF 2 For instruction: store i16 %2, i16* %arrayidx7, align 2 | ||
; | ||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 4 For instruction: store i16 %0, i16* %arrayidx2, align 2 | ||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 41 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2 | ||
; | ||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 8 For instruction: store i16 %0, i16* %arrayidx2, align 2 | ||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 83 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2 | ||
; | ||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 16 For instruction: store i16 %0, i16* %arrayidx2, align 2 | ||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 181 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2 | ||
|
||
define void @test2(i16* noalias nocapture %points, i32 %numPoints, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) { | ||
entry: | ||
%cmp15 = icmp sgt i32 %numPoints, 0 | ||
br i1 %cmp15, label %for.body.preheader, label %for.end | ||
|
||
for.body.preheader: | ||
%wide.trip.count = zext i32 %numPoints to i64 | ||
br label %for.body | ||
|
||
for.body: | ||
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] | ||
%arrayidx = getelementptr inbounds i16, i16* %x, i64 %indvars.iv | ||
%0 = load i16, i16* %arrayidx, align 2 | ||
%1 = shl nsw i64 %indvars.iv, 2 | ||
%arrayidx2 = getelementptr inbounds i16, i16* %points, i64 %1 | ||
store i16 %0, i16* %arrayidx2, align 2 | ||
%arrayidx4 = getelementptr inbounds i16, i16* %y, i64 %indvars.iv | ||
%2 = load i16, i16* %arrayidx4, align 2 | ||
%3 = or i64 %1, 1 | ||
%arrayidx7 = getelementptr inbounds i16, i16* %points, i64 %3 | ||
store i16 %2, i16* %arrayidx7, align 2 | ||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | ||
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count | ||
br i1 %exitcond.not, label %for.end.loopexit, label %for.body | ||
|
||
for.end.loopexit: | ||
br label %for.end | ||
|
||
for.end: | ||
ret void | ||
} | ||
|
||
; (3) Testing a scenario of a conditional store. The gaps mask of the store is also | ||
; And-ed with the condition mask (x[i] > 0). | ||
; If using masked memops to vectorize interleaved-group with gaps is | ||
; not allowed, the store is scalarized and predicated. | ||
; Here the Interleave-group is with factor 3, storing only 1 member out of the 3. | ||
; The input IR was generated from this source: | ||
; for(i=0;i<1024;i++){ | ||
; if (x[i] > 0) | ||
; points[i*3] = x[i]; | ||
; } | ||
|
||
; DISABLED_MASKED_STRIDED: LV: Checking a loop in "test" | ||
; | ||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx6, align 2 | ||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %0, i16* %arrayidx6, align 2 | ||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %0, i16* %arrayidx6, align 2 | ||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %0, i16* %arrayidx6, align 2 | ||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 20 for VF 16 For instruction: store i16 %0, i16* %arrayidx6, align 2 | ||
|
||
; ENABLED_MASKED_STRIDED: LV: Checking a loop in "test" | ||
; | ||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx6, align 2 | ||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %0, i16* %arrayidx6, align 2 | ||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %0, i16* %arrayidx6, align 2 | ||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %0, i16* %arrayidx6, align 2 | ||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 20 for VF 16 For instruction: store i16 %0, i16* %arrayidx6, align 2 | ||
|
||
define void @test(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readnone %y) { | ||
entry: | ||
br label %for.body | ||
|
||
for.body: | ||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ] | ||
%arrayidx = getelementptr inbounds i16, i16* %x, i64 %indvars.iv | ||
%0 = load i16, i16* %arrayidx, align 2 | ||
%cmp1 = icmp sgt i16 %0, 0 | ||
br i1 %cmp1, label %if.then, label %for.inc | ||
|
||
if.then: | ||
%1 = mul nuw nsw i64 %indvars.iv, 3 | ||
%arrayidx6 = getelementptr inbounds i16, i16* %points, i64 %1 | ||
store i16 %0, i16* %arrayidx6, align 2 | ||
br label %for.inc | ||
|
||
for.inc: | ||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | ||
%exitcond.not = icmp eq i64 %indvars.iv.next, 1024 | ||
br i1 %exitcond.not, label %for.end, label %for.body | ||
|
||
for.end: | ||
ret void | ||
} |