Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[NFC][X86][Costmodel] Add some more interleaved load/store test with …
…i16 element type Not sure if even larger interleaving factors are needed, but these are what i have seen being queried in the wild.
- Loading branch information
Showing
10 changed files
with
294 additions
and
24 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
59 changes: 59 additions & 0 deletions
59
llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s | ||
; REQUIRES: asserts | ||
|
||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" | ||
target triple = "x86_64-unknown-linux-gnu" | ||
|
||
@A = global [1024 x i16] zeroinitializer, align 128 | ||
@B = global [1024 x i8] zeroinitializer, align 128 | ||
|
||
; CHECK: LV: Checking a loop in "test" | ||
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, i16* %in0, align 2 | ||
; CHECK: LV: Found an estimated cost of 26 for VF 2 For instruction: %v0 = load i16, i16* %in0, align 2 | ||
; CHECK: LV: Found an estimated cost of 50 for VF 4 For instruction: %v0 = load i16, i16* %in0, align 2 | ||
; CHECK: LV: Found an estimated cost of 99 for VF 8 For instruction: %v0 = load i16, i16* %in0, align 2 | ||
; CHECK: LV: Found an estimated cost of 285 for VF 16 For instruction: %v0 = load i16, i16* %in0, align 2 | ||
; CHECK-NOT: LV: Found an estimated cost of {{[0-9]+}} for VF {{[0-9]+}} For instruction: %v0 = load i16, i16* %in0, align 2 | ||
|
||
define void @test() { | ||
entry: | ||
br label %for.body | ||
|
||
for.body: | ||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] | ||
|
||
%iv.0 = add nuw nsw i64 %iv, 0 | ||
%iv.1 = add nuw nsw i64 %iv, 1 | ||
%iv.2 = add nuw nsw i64 %iv, 2 | ||
%iv.3 = add nuw nsw i64 %iv, 3 | ||
%iv.4 = add nuw nsw i64 %iv, 4 | ||
|
||
%in0 = getelementptr inbounds [1024 x i16], [1024 x i16]* @A, i64 0, i64 %iv.0 | ||
%in1 = getelementptr inbounds [1024 x i16], [1024 x i16]* @A, i64 0, i64 %iv.1 | ||
%in2 = getelementptr inbounds [1024 x i16], [1024 x i16]* @A, i64 0, i64 %iv.2 | ||
%in3 = getelementptr inbounds [1024 x i16], [1024 x i16]* @A, i64 0, i64 %iv.3 | ||
%in4 = getelementptr inbounds [1024 x i16], [1024 x i16]* @A, i64 0, i64 %iv.4 | ||
|
||
%v0 = load i16, i16* %in0 | ||
%v1 = load i16, i16* %in1 | ||
%v2 = load i16, i16* %in2 | ||
%v3 = load i16, i16* %in3 | ||
%v4 = load i16, i16* %in4 | ||
|
||
%reduce.add.0 = add i16 %v0, %v1 | ||
%reduce.add.1 = add i16 %reduce.add.0, %v2 | ||
%reduce.add.2 = add i16 %reduce.add.1, %v3 | ||
%reduce.add.3 = add i16 %reduce.add.2, %v4 | ||
|
||
%reduce.add.3.narrow = trunc i16 %reduce.add.3 to i8 | ||
|
||
%out = getelementptr inbounds [1024 x i8], [1024 x i8]* @B, i64 0, i64 %iv.0 | ||
store i8 %reduce.add.3.narrow, i8* %out | ||
|
||
%iv.next = add nuw nsw i64 %iv.0, 5 | ||
%cmp = icmp ult i64 %iv.next, 1024 | ||
br i1 %cmp, label %for.body, label %for.cond.cleanup | ||
|
||
for.cond.cleanup: | ||
ret void | ||
} |
63 changes: 63 additions & 0 deletions
63
llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-6.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s | ||
; REQUIRES: asserts | ||
|
||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" | ||
target triple = "x86_64-unknown-linux-gnu" | ||
|
||
@A = global [1024 x i16] zeroinitializer, align 128 | ||
@B = global [1024 x i8] zeroinitializer, align 128 | ||
|
||
; CHECK: LV: Checking a loop in "test" | ||
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, i16* %in0, align 2 | ||
; CHECK: LV: Found an estimated cost of 31 for VF 2 For instruction: %v0 = load i16, i16* %in0, align 2 | ||
; CHECK: LV: Found an estimated cost of 58 for VF 4 For instruction: %v0 = load i16, i16* %in0, align 2 | ||
; CHECK: LV: Found an estimated cost of 123 for VF 8 For instruction: %v0 = load i16, i16* %in0, align 2 | ||
; CHECK: LV: Found an estimated cost of 342 for VF 16 For instruction: %v0 = load i16, i16* %in0, align 2 | ||
; CHECK-NOT: LV: Found an estimated cost of {{[0-9]+}} for VF {{[0-9]+}} For instruction: %v0 = load i16, i16* %in0, align 2 | ||
|
||
define void @test() { | ||
entry: | ||
br label %for.body | ||
|
||
for.body: | ||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] | ||
|
||
%iv.0 = add nuw nsw i64 %iv, 0 | ||
%iv.1 = add nuw nsw i64 %iv, 1 | ||
%iv.2 = add nuw nsw i64 %iv, 2 | ||
%iv.3 = add nuw nsw i64 %iv, 3 | ||
%iv.4 = add nuw nsw i64 %iv, 4 | ||
%iv.5 = add nuw nsw i64 %iv, 5 | ||
|
||
%in0 = getelementptr inbounds [1024 x i16], [1024 x i16]* @A, i64 0, i64 %iv.0 | ||
%in1 = getelementptr inbounds [1024 x i16], [1024 x i16]* @A, i64 0, i64 %iv.1 | ||
%in2 = getelementptr inbounds [1024 x i16], [1024 x i16]* @A, i64 0, i64 %iv.2 | ||
%in3 = getelementptr inbounds [1024 x i16], [1024 x i16]* @A, i64 0, i64 %iv.3 | ||
%in4 = getelementptr inbounds [1024 x i16], [1024 x i16]* @A, i64 0, i64 %iv.4 | ||
%in5 = getelementptr inbounds [1024 x i16], [1024 x i16]* @A, i64 0, i64 %iv.5 | ||
|
||
%v0 = load i16, i16* %in0 | ||
%v1 = load i16, i16* %in1 | ||
%v2 = load i16, i16* %in2 | ||
%v3 = load i16, i16* %in3 | ||
%v4 = load i16, i16* %in4 | ||
%v5 = load i16, i16* %in5 | ||
|
||
%reduce.add.0 = add i16 %v0, %v1 | ||
%reduce.add.1 = add i16 %reduce.add.0, %v2 | ||
%reduce.add.2 = add i16 %reduce.add.1, %v3 | ||
%reduce.add.3 = add i16 %reduce.add.2, %v4 | ||
%reduce.add.4 = add i16 %reduce.add.3, %v5 | ||
|
||
%reduce.add.4.narrow = trunc i16 %reduce.add.4 to i8 | ||
|
||
%out = getelementptr inbounds [1024 x i8], [1024 x i8]* @B, i64 0, i64 %iv.0 | ||
store i8 %reduce.add.4.narrow, i8* %out | ||
|
||
%iv.next = add nuw nsw i64 %iv.0, 6 | ||
%cmp = icmp ult i64 %iv.next, 1024 | ||
br i1 %cmp, label %for.body, label %for.cond.cleanup | ||
|
||
for.cond.cleanup: | ||
ret void | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
60 changes: 60 additions & 0 deletions
60
llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-5.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s | ||
; REQUIRES: asserts | ||
|
||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" | ||
target triple = "x86_64-unknown-linux-gnu" | ||
|
||
@A = global [1024 x i8] zeroinitializer, align 128 | ||
@B = global [1024 x i16] zeroinitializer, align 128 | ||
|
||
; CHECK: LV: Checking a loop in "test" | ||
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, i16* %out4, align 2 | ||
; CHECK: LV: Found an estimated cost of 28 for VF 2 For instruction: store i16 %v4, i16* %out4, align 2 | ||
; CHECK: LV: Found an estimated cost of 58 for VF 4 For instruction: store i16 %v4, i16* %out4, align 2 | ||
; CHECK: LV: Found an estimated cost of 115 for VF 8 For instruction: store i16 %v4, i16* %out4, align 2 | ||
; CHECK: LV: Found an estimated cost of 285 for VF 16 For instruction: store i16 %v4, i16* %out4, align 2 | ||
; CHECK-NOT: LV: Found an estimated cost of {{[0-9]+}} for VF {{[0-9]+}} For instruction: store i16 %v4, i16* %out4, align 2 | ||
|
||
define void @test() { | ||
entry: | ||
br label %for.body | ||
|
||
for.body: | ||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] | ||
|
||
%iv.0 = add nuw nsw i64 %iv, 0 | ||
%iv.1 = add nuw nsw i64 %iv, 1 | ||
%iv.2 = add nuw nsw i64 %iv, 2 | ||
%iv.3 = add nuw nsw i64 %iv, 3 | ||
%iv.4 = add nuw nsw i64 %iv, 4 | ||
|
||
%in = getelementptr inbounds [1024 x i8], [1024 x i8]* @A, i64 0, i64 %iv.0 | ||
%v.narrow = load i8, i8* %in | ||
|
||
%v = zext i8 %v.narrow to i16 | ||
|
||
%v0 = add i16 %v, 0 | ||
%v1 = add i16 %v, 1 | ||
%v2 = add i16 %v, 2 | ||
%v3 = add i16 %v, 3 | ||
%v4 = add i16 %v, 4 | ||
|
||
%out0 = getelementptr inbounds [1024 x i16], [1024 x i16]* @B, i64 0, i64 %iv.0 | ||
%out1 = getelementptr inbounds [1024 x i16], [1024 x i16]* @B, i64 0, i64 %iv.1 | ||
%out2 = getelementptr inbounds [1024 x i16], [1024 x i16]* @B, i64 0, i64 %iv.2 | ||
%out3 = getelementptr inbounds [1024 x i16], [1024 x i16]* @B, i64 0, i64 %iv.3 | ||
%out4 = getelementptr inbounds [1024 x i16], [1024 x i16]* @B, i64 0, i64 %iv.4 | ||
|
||
store i16 %v0, i16* %out0 | ||
store i16 %v1, i16* %out1 | ||
store i16 %v2, i16* %out2 | ||
store i16 %v3, i16* %out3 | ||
store i16 %v4, i16* %out4 | ||
|
||
%iv.next = add nuw nsw i64 %iv.0, 5 | ||
%cmp = icmp ult i64 %iv.next, 1024 | ||
br i1 %cmp, label %for.body, label %for.cond.cleanup | ||
|
||
for.cond.cleanup: | ||
ret void | ||
} |
Oops, something went wrong.