Skip to content

Commit

Permalink
[NFC][X86] Duplicate LV test into a costmodel test
Browse files Browse the repository at this point in the history
Copied from llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
As discussed in D111460 / D112877 / D112873 we have basically no test coverage
for this part of cost model.
  • Loading branch information
LebedevRI committed Nov 3, 2021
1 parent 0923341 commit c6b3da1
Showing 1 changed file with 204 additions and 0 deletions.
@@ -0,0 +1,204 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -mcpu=skx -S -loop-vectorize -enable-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s -check-prefix=DISABLED_MASKED_STRIDED
; RUN: opt -mcpu=skx -S -loop-vectorize -enable-interleaved-mem-accesses -enable-masked-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s -check-prefix=ENABLED_MASKED_STRIDED

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; (1) Interleave-group with factor 4, storing only 2 members out of the 4.
; Check that when we allow masked-memops to support interleave-group with gaps,
; the store is vectorized using a wide masked store, with a 1,1,0,0,1,1,0,0,... mask.
; Check that when we don't allow masked-memops to support interleave-group with gaps,
; the store is scalarized.
; The input IR was generated from this source:
; for(i=0;i<1024;i++){
; points[i*4] = x[i];
; points[i*4 + 1] = y[i];
; }
; (relates to the testcase in PR50566)

; DISABLED_MASKED_STRIDED: LV: Checking a loop in "test1"
;
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx2, align 2
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, i16* %arrayidx7, align 2
;
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %0, i16* %arrayidx2, align 2
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %2, i16* %arrayidx7, align 2
;
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 54 for VF 4 For instruction: store i16 %0, i16* %arrayidx2, align 2
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 54 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2
;
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 110 for VF 8 For instruction: store i16 %0, i16* %arrayidx2, align 2
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 110 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2
;
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 228 for VF 16 For instruction: store i16 %0, i16* %arrayidx2, align 2
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 228 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2

; ENABLED_MASKED_STRIDED: LV: Checking a loop in "test1"
;
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx2, align 2
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, i16* %arrayidx7, align 2
;
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 2 For instruction: store i16 %0, i16* %arrayidx2, align 2
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 9 for VF 2 For instruction: store i16 %2, i16* %arrayidx7, align 2
;
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 4 For instruction: store i16 %0, i16* %arrayidx2, align 2
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 20 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2
;
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 8 For instruction: store i16 %0, i16* %arrayidx2, align 2
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 40 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2
;
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 16 For instruction: store i16 %0, i16* %arrayidx2, align 2
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 96 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2

define void @test1(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) {
entry:
br label %for.body

for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds i16, i16* %x, i64 %indvars.iv
%0 = load i16, i16* %arrayidx, align 2
%1 = shl nuw nsw i64 %indvars.iv, 2
%arrayidx2 = getelementptr inbounds i16, i16* %points, i64 %1
store i16 %0, i16* %arrayidx2, align 2
%arrayidx4 = getelementptr inbounds i16, i16* %y, i64 %indvars.iv
%2 = load i16, i16* %arrayidx4, align 2
%3 = or i64 %1, 1
%arrayidx7 = getelementptr inbounds i16, i16* %points, i64 %3
store i16 %2, i16* %arrayidx7, align 2
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond.not, label %for.end, label %for.body

for.end:
ret void
}

; (2) Same as above, but this time the gaps mask of the store is also And-ed with the
; fold-tail mask. If using masked memops to vectorize interleaved-group with gaps is
; not allowed, the store is scalarized and predicated.
; The input IR was generated from this source:
; for(i=0;i<numPoints;i++){
; points[i*4] = x[i];
; points[i*4 + 1] = y[i];
; }

; DISABLED_MASKED_STRIDED: LV: Checking a loop in "test2"
;
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx2, align 2
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, i16* %arrayidx7, align 2
;
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 15 for VF 2 For instruction: store i16 %0, i16* %arrayidx2, align 2
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 2 For instruction: store i16 %2, i16* %arrayidx7, align 2
;
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 31 for VF 4 For instruction: store i16 %0, i16* %arrayidx2, align 2
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2
;
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 63 for VF 8 For instruction: store i16 %0, i16* %arrayidx2, align 2
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2
;
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 130 for VF 16 For instruction: store i16 %0, i16* %arrayidx2, align 2
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2

; ENABLED_MASKED_STRIDED: LV: Checking a loop in "test2"
;
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx2, align 2
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, i16* %arrayidx7, align 2
;
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 2 For instruction: store i16 %0, i16* %arrayidx2, align 2
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 20 for VF 2 For instruction: store i16 %2, i16* %arrayidx7, align 2
;
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 4 For instruction: store i16 %0, i16* %arrayidx2, align 2
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 41 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2
;
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 8 For instruction: store i16 %0, i16* %arrayidx2, align 2
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 83 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2
;
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 16 For instruction: store i16 %0, i16* %arrayidx2, align 2
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 181 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2

define void @test2(i16* noalias nocapture %points, i32 %numPoints, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) {
entry:
%cmp15 = icmp sgt i32 %numPoints, 0
br i1 %cmp15, label %for.body.preheader, label %for.end

for.body.preheader:
%wide.trip.count = zext i32 %numPoints to i64
br label %for.body

for.body:
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds i16, i16* %x, i64 %indvars.iv
%0 = load i16, i16* %arrayidx, align 2
%1 = shl nsw i64 %indvars.iv, 2
%arrayidx2 = getelementptr inbounds i16, i16* %points, i64 %1
store i16 %0, i16* %arrayidx2, align 2
%arrayidx4 = getelementptr inbounds i16, i16* %y, i64 %indvars.iv
%2 = load i16, i16* %arrayidx4, align 2
%3 = or i64 %1, 1
%arrayidx7 = getelementptr inbounds i16, i16* %points, i64 %3
store i16 %2, i16* %arrayidx7, align 2
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
br i1 %exitcond.not, label %for.end.loopexit, label %for.body

for.end.loopexit:
br label %for.end

for.end:
ret void
}

; (3) Testing a scenario of a conditional store. The gaps mask of the store is also
; And-ed with the condition mask (x[i] > 0).
; If using masked memops to vectorize interleaved-group with gaps is
; not allowed, the store is scalarized and predicated.
; Here the Interleave-group is with factor 3, storing only 1 member out of the 3.
; The input IR was generated from this source:
; for(i=0;i<1024;i++){
; if (x[i] > 0)
; points[i*3] = x[i];
; }

; DISABLED_MASKED_STRIDED: LV: Checking a loop in "test"
;
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx6, align 2
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %0, i16* %arrayidx6, align 2
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %0, i16* %arrayidx6, align 2
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %0, i16* %arrayidx6, align 2
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 20 for VF 16 For instruction: store i16 %0, i16* %arrayidx6, align 2

; ENABLED_MASKED_STRIDED: LV: Checking a loop in "test"
;
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx6, align 2
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %0, i16* %arrayidx6, align 2
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %0, i16* %arrayidx6, align 2
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %0, i16* %arrayidx6, align 2
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 20 for VF 16 For instruction: store i16 %0, i16* %arrayidx6, align 2

define void @test(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readnone %y) {
entry:
br label %for.body

for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
%arrayidx = getelementptr inbounds i16, i16* %x, i64 %indvars.iv
%0 = load i16, i16* %arrayidx, align 2
%cmp1 = icmp sgt i16 %0, 0
br i1 %cmp1, label %if.then, label %for.inc

if.then:
%1 = mul nuw nsw i64 %indvars.iv, 3
%arrayidx6 = getelementptr inbounds i16, i16* %points, i64 %1
store i16 %0, i16* %arrayidx6, align 2
br label %for.inc

for.inc:
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond.not, label %for.end, label %for.body

for.end:
ret void
}

0 comments on commit c6b3da1

Please sign in to comment.