Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[NFC][X86][LV] Add basic costmodel test coverage for not-fully-interl…
…eaved i32 loads The coverage could have cumulative explosion here, so i'm adding only the most basic cases, and hoping it's enough, though more can be added if needed.
- Loading branch information
Showing
6 changed files
with
441 additions
and
0 deletions.
There are no files selected for viewing
71 changes: 71 additions & 0 deletions
71
llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2-indices-0u.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,SSE2 | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX1 | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX2 | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw,+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX512 | ||
; REQUIRES: asserts | ||
|
||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" | ||
target triple = "x86_64-unknown-linux-gnu" | ||
|
||
@A = global [1024 x i32] zeroinitializer, align 128 | ||
@B = global [1024 x i8] zeroinitializer, align 128 | ||
|
||
; CHECK: LV: Checking a loop in "test" | ||
; | ||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; SSE2: LV: Found an estimated cost of 29 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; SSE2: LV: Found an estimated cost of 59 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; SSE2: LV: Found an estimated cost of 118 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; SSE2: LV: Found an estimated cost of 236 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; | ||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX1: LV: Found an estimated cost of 54 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX1: LV: Found an estimated cost of 110 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX1: LV: Found an estimated cost of 220 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX1: LV: Found an estimated cost of 440 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; | ||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX2: LV: Found an estimated cost of 26 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX2: LV: Found an estimated cost of 54 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX2: LV: Found an estimated cost of 110 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX2: LV: Found an estimated cost of 220 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX2: LV: Found an estimated cost of 440 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; | ||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX512: LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; | ||
; CHECK-NOT: LV: Found an estimated cost of {{[0-9]+}} for VF {{[0-9]+}} For instruction: %v0 = load i32, i32* %in0, align 2 | ||
|
||
define void @test() { | ||
entry: | ||
br label %for.body | ||
|
||
for.body: | ||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] | ||
|
||
%iv.0 = add nuw nsw i64 %iv, 0 | ||
|
||
%in0 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %iv.0 | ||
|
||
%v0 = load i32, i32* %in0 | ||
|
||
%reduce.add.0 = add i32 %v0, 0 | ||
|
||
%reduce.add.0.narrow = trunc i32 %reduce.add.0 to i8 | ||
|
||
%out = getelementptr inbounds [1024 x i8], [1024 x i8]* @B, i64 0, i64 %iv.0 | ||
store i8 %reduce.add.0.narrow, i8* %out | ||
|
||
%iv.next = add nuw nsw i64 %iv.0, 2 | ||
%cmp = icmp ult i64 %iv.next, 1024 | ||
br i1 %cmp, label %for.body, label %for.cond.cleanup | ||
|
||
for.cond.cleanup: | ||
ret void | ||
} |
75 changes: 75 additions & 0 deletions
75
llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-01u.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,SSE2 | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX1 | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX2 | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw,+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX512 | ||
; REQUIRES: asserts | ||
|
||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" | ||
target triple = "x86_64-unknown-linux-gnu" | ||
|
||
@A = global [1024 x i32] zeroinitializer, align 128 | ||
@B = global [1024 x i8] zeroinitializer, align 128 | ||
|
||
; CHECK: LV: Checking a loop in "test" | ||
; | ||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; SSE2: LV: Found an estimated cost of 29 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; SSE2: LV: Found an estimated cost of 59 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; SSE2: LV: Found an estimated cost of 118 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; SSE2: LV: Found an estimated cost of 236 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; | ||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX1: LV: Found an estimated cost of 54 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX1: LV: Found an estimated cost of 110 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX1: LV: Found an estimated cost of 220 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX1: LV: Found an estimated cost of 440 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; | ||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX2: LV: Found an estimated cost of 26 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX2: LV: Found an estimated cost of 54 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX2: LV: Found an estimated cost of 110 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX2: LV: Found an estimated cost of 220 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX2: LV: Found an estimated cost of 440 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; | ||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX512: LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; | ||
; CHECK-NOT: LV: Found an estimated cost of {{[0-9]+}} for VF {{[0-9]+}} For instruction: %v0 = load i32, i32* %in0, align 4 | ||
|
||
define void @test() { | ||
entry: | ||
br label %for.body | ||
|
||
for.body: | ||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] | ||
|
||
%iv.0 = add nuw nsw i64 %iv, 0 | ||
%iv.1 = add nuw nsw i64 %iv, 1 | ||
|
||
%in0 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %iv.0 | ||
%in1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %iv.1 | ||
|
||
%v0 = load i32, i32* %in0 | ||
%v1 = load i32, i32* %in1 | ||
|
||
%reduce.add.0 = add i32 %v0, %v1 | ||
%reduce.add.1 = add i32 %reduce.add.0, 0 | ||
|
||
%reduce.add.1.narrow = trunc i32 %reduce.add.1 to i8 | ||
|
||
%out = getelementptr inbounds [1024 x i8], [1024 x i8]* @B, i64 0, i64 %iv.0 | ||
store i8 %reduce.add.1.narrow, i8* %out | ||
|
||
%iv.next = add nuw nsw i64 %iv.0, 3 | ||
%cmp = icmp ult i64 %iv.next, 1024 | ||
br i1 %cmp, label %for.body, label %for.cond.cleanup | ||
|
||
for.cond.cleanup: | ||
ret void | ||
} |
70 changes: 70 additions & 0 deletions
70
llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-0uu.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,SSE2 | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX1 | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX2 | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw,+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX512 | ||
; REQUIRES: asserts | ||
|
||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" | ||
target triple = "x86_64-unknown-linux-gnu" | ||
|
||
@A = global [1024 x i32] zeroinitializer, align 128 | ||
@B = global [1024 x i8] zeroinitializer, align 128 | ||
|
||
; CHECK: LV: Checking a loop in "test" | ||
; | ||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; SSE2: LV: Found an estimated cost of 29 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; SSE2: LV: Found an estimated cost of 59 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; SSE2: LV: Found an estimated cost of 118 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; SSE2: LV: Found an estimated cost of 236 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; | ||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX1: LV: Found an estimated cost of 54 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX1: LV: Found an estimated cost of 110 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX1: LV: Found an estimated cost of 220 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX1: LV: Found an estimated cost of 440 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; | ||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX2: LV: Found an estimated cost of 26 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX2: LV: Found an estimated cost of 54 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX2: LV: Found an estimated cost of 110 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX2: LV: Found an estimated cost of 220 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX2: LV: Found an estimated cost of 440 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; | ||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX512: LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
|
||
define void @test() { | ||
entry: | ||
br label %for.body | ||
|
||
for.body: | ||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] | ||
|
||
%iv.0 = add nuw nsw i64 %iv, 0 | ||
|
||
%in0 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %iv.0 | ||
|
||
%v0 = load i32, i32* %in0 | ||
|
||
%reduce.add.0 = add i32 %v0, 0 | ||
%reduce.add.1 = add i32 %reduce.add.0, 0 | ||
|
||
%reduce.add.1.narrow = trunc i32 %reduce.add.1 to i8 | ||
|
||
%out = getelementptr inbounds [1024 x i8], [1024 x i8]* @B, i64 0, i64 %iv.0 | ||
store i8 %reduce.add.1.narrow, i8* %out | ||
|
||
%iv.next = add nuw nsw i64 %iv.0, 3 | ||
%cmp = icmp ult i64 %iv.next, 1024 | ||
br i1 %cmp, label %for.body, label %for.cond.cleanup | ||
|
||
for.cond.cleanup: | ||
ret void | ||
} |
76 changes: 76 additions & 0 deletions
76
llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-012u.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,SSE2 | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX1 | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX2 | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw,+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX512 | ||
; REQUIRES: asserts | ||
|
||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" | ||
target triple = "x86_64-unknown-linux-gnu" | ||
|
||
@A = global [1024 x i32] zeroinitializer, align 128 | ||
@B = global [1024 x i8] zeroinitializer, align 128 | ||
|
||
; CHECK: LV: Checking a loop in "test" | ||
; | ||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; SSE2: LV: Found an estimated cost of 29 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; SSE2: LV: Found an estimated cost of 59 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; SSE2: LV: Found an estimated cost of 118 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; | ||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX1: LV: Found an estimated cost of 54 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX1: LV: Found an estimated cost of 110 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX1: LV: Found an estimated cost of 220 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; | ||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX2: LV: Found an estimated cost of 26 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX2: LV: Found an estimated cost of 54 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX2: LV: Found an estimated cost of 110 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX2: LV: Found an estimated cost of 220 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; | ||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX512: LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v0 = load i32, i32* %in0, align 4 | ||
; | ||
; CHECK-NOT: LV: Found an estimated cost of {{[0-9]+}} for VF {{[0-9]+}} For instruction: %v0 = load i32, i32* %in0, align 4 | ||
|
||
define void @test() { | ||
entry: | ||
br label %for.body | ||
|
||
for.body: | ||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] | ||
|
||
%iv.0 = add nuw nsw i64 %iv, 0 | ||
%iv.1 = add nuw nsw i64 %iv, 1 | ||
%iv.2 = add nuw nsw i64 %iv, 2 | ||
|
||
%in0 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %iv.0 | ||
%in1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %iv.1 | ||
%in2 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %iv.2 | ||
|
||
%v0 = load i32, i32* %in0 | ||
%v1 = load i32, i32* %in1 | ||
%v2 = load i32, i32* %in2 | ||
|
||
%reduce.add.0 = add i32 %v0, %v1 | ||
%reduce.add.1 = add i32 %reduce.add.0, %v2 | ||
%reduce.add.2 = add i32 %reduce.add.1, 0 | ||
|
||
%reduce.add.2.narrow = trunc i32 %reduce.add.2 to i8 | ||
|
||
%out = getelementptr inbounds [1024 x i8], [1024 x i8]* @B, i64 0, i64 %iv.0 | ||
store i8 %reduce.add.2.narrow, i8* %out | ||
|
||
%iv.next = add nuw nsw i64 %iv.0, 4 | ||
%cmp = icmp ult i64 %iv.next, 1024 | ||
br i1 %cmp, label %for.body, label %for.cond.cleanup | ||
|
||
for.cond.cleanup: | ||
ret void | ||
} |
Oops, something went wrong.