Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[NFC][X86][LV] Add costmodel test coverage for interleaved i32 load/s…
…tore stride=2
- Loading branch information
Showing
2 changed files
with
149 additions
and
0 deletions.
There are no files selected for viewing
74 changes: 74 additions & 0 deletions
74
llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,SSE2 | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX1 | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX2 | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw,+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX512 | ||
; REQUIRES: asserts | ||
|
||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" | ||
target triple = "x86_64-unknown-linux-gnu" | ||
|
||
@A = global [1024 x i32] zeroinitializer, align 128 | ||
@B = global [1024 x i8] zeroinitializer, align 128 | ||
|
||
; CHECK: LV: Checking a loop in "test" | ||
; | ||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; SSE2: LV: Found an estimated cost of 14 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; SSE2: LV: Found an estimated cost of 30 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; SSE2: LV: Found an estimated cost of 60 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; SSE2: LV: Found an estimated cost of 120 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; | ||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX1: LV: Found an estimated cost of 9 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX1: LV: Found an estimated cost of 21 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX1: LV: Found an estimated cost of 46 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX1: LV: Found an estimated cost of 92 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX1: LV: Found an estimated cost of 184 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; | ||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX2: LV: Found an estimated cost of 9 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX2: LV: Found an estimated cost of 21 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX2: LV: Found an estimated cost of 46 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX2: LV: Found an estimated cost of 92 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX2: LV: Found an estimated cost of 184 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; | ||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX512: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX512: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX512: LV: Found an estimated cost of 3 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX512: LV: Found an estimated cost of 5 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX512: LV: Found an estimated cost of 22 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX512: LV: Found an estimated cost of 92 for VF 64 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; | ||
; CHECK-NOT: LV: Found an estimated cost of {{[0-9]+}} for VF {{[0-9]+}} For instruction: %v0 = load i32, i32* %in0, align 2 | ||
|
||
define void @test() { | ||
entry: | ||
br label %for.body | ||
|
||
for.body: | ||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] | ||
|
||
%iv.0 = add nuw nsw i64 %iv, 0 | ||
%iv.1 = add nuw nsw i64 %iv, 1 | ||
|
||
%in0 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %iv.0 | ||
%in1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %iv.1 | ||
|
||
%v0 = load i32, i32* %in0 | ||
%v1 = load i32, i32* %in1 | ||
|
||
%reduce.add.0 = add i32 %v0, %v1 | ||
|
||
%reduce.add.0.narrow = trunc i32 %reduce.add.0 to i8 | ||
|
||
%out = getelementptr inbounds [1024 x i8], [1024 x i8]* @B, i64 0, i64 %iv.0 | ||
store i8 %reduce.add.0.narrow, i8* %out | ||
|
||
%iv.next = add nuw nsw i64 %iv.0, 2 | ||
%cmp = icmp ult i64 %iv.next, 1024 | ||
br i1 %cmp, label %for.body, label %for.cond.cleanup | ||
|
||
for.cond.cleanup: | ||
ret void | ||
} |
75 changes: 75 additions & 0 deletions
75
llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-2.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,SSE2 | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX1 | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX2 | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw,+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX512 | ||
; REQUIRES: asserts | ||
|
||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" | ||
target triple = "x86_64-unknown-linux-gnu" | ||
|
||
@A = global [1024 x i8] zeroinitializer, align 128 | ||
@B = global [1024 x i32] zeroinitializer, align 128 | ||
|
||
; CHECK: LV: Checking a loop in "test" | ||
; | ||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v1, i32* %out1, align 4 | ||
; SSE2: LV: Found an estimated cost of 14 for VF 2 For instruction: store i32 %v1, i32* %out1, align 4 | ||
; SSE2: LV: Found an estimated cost of 30 for VF 4 For instruction: store i32 %v1, i32* %out1, align 4 | ||
; SSE2: LV: Found an estimated cost of 60 for VF 8 For instruction: store i32 %v1, i32* %out1, align 4 | ||
; SSE2: LV: Found an estimated cost of 120 for VF 16 For instruction: store i32 %v1, i32* %out1, align 4 | ||
; | ||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v1, i32* %out1, align 4 | ||
; AVX1: LV: Found an estimated cost of 9 for VF 2 For instruction: store i32 %v1, i32* %out1, align 4 | ||
; AVX1: LV: Found an estimated cost of 19 for VF 4 For instruction: store i32 %v1, i32* %out1, align 4 | ||
; AVX1: LV: Found an estimated cost of 46 for VF 8 For instruction: store i32 %v1, i32* %out1, align 4 | ||
; AVX1: LV: Found an estimated cost of 92 for VF 16 For instruction: store i32 %v1, i32* %out1, align 4 | ||
; AVX1: LV: Found an estimated cost of 184 for VF 32 For instruction: store i32 %v1, i32* %out1, align 4 | ||
; | ||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v1, i32* %out1, align 4 | ||
; AVX2: LV: Found an estimated cost of 9 for VF 2 For instruction: store i32 %v1, i32* %out1, align 4 | ||
; AVX2: LV: Found an estimated cost of 19 for VF 4 For instruction: store i32 %v1, i32* %out1, align 4 | ||
; AVX2: LV: Found an estimated cost of 46 for VF 8 For instruction: store i32 %v1, i32* %out1, align 4 | ||
; AVX2: LV: Found an estimated cost of 92 for VF 16 For instruction: store i32 %v1, i32* %out1, align 4 | ||
; AVX2: LV: Found an estimated cost of 184 for VF 32 For instruction: store i32 %v1, i32* %out1, align 4 | ||
; | ||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v1, i32* %out1, align 4 | ||
; AVX512: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %v1, i32* %out1, align 4 | ||
; AVX512: LV: Found an estimated cost of 2 for VF 4 For instruction: store i32 %v1, i32* %out1, align 4 | ||
; AVX512: LV: Found an estimated cost of 2 for VF 8 For instruction: store i32 %v1, i32* %out1, align 4 | ||
; AVX512: LV: Found an estimated cost of 5 for VF 16 For instruction: store i32 %v1, i32* %out1, align 4 | ||
; AVX512: LV: Found an estimated cost of 10 for VF 32 For instruction: store i32 %v1, i32* %out1, align 4 | ||
; AVX512: LV: Found an estimated cost of 20 for VF 64 For instruction: store i32 %v1, i32* %out1, align 4 | ||
; | ||
; CHECK-NOT: LV: Found an estimated cost of {{[0-9]+}} for VF {{[0-9]+}} For instruction: store i32 %v1, i32* %out1, align 2 | ||
|
||
define void @test() { | ||
entry: | ||
br label %for.body | ||
|
||
for.body: | ||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] | ||
|
||
%iv.0 = add nuw nsw i64 %iv, 0 | ||
%iv.1 = add nuw nsw i64 %iv, 1 | ||
|
||
%in = getelementptr inbounds [1024 x i8], [1024 x i8]* @A, i64 0, i64 %iv.0 | ||
%v.narrow = load i8, i8* %in | ||
|
||
%v = zext i8 %v.narrow to i32 | ||
|
||
%v0 = add i32 %v, 0 | ||
%v1 = add i32 %v, 1 | ||
|
||
%out0 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %iv.0 | ||
%out1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %iv.1 | ||
|
||
store i32 %v0, i32* %out0 | ||
store i32 %v1, i32* %out1 | ||
|
||
%iv.next = add nuw nsw i64 %iv.0, 2 | ||
%cmp = icmp ult i64 %iv.next, 1024 | ||
br i1 %cmp, label %for.body, label %for.cond.cleanup | ||
|
||
for.cond.cleanup: | ||
ret void | ||
} |