Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[NFC][X86][LV] Add costmodel test coverage for interleaved i64/f64 lo…
…ad/store stride=3
- Loading branch information
Showing
4 changed files
with
302 additions
and
0 deletions.
There are no files selected for viewing
75 changes: 75 additions & 0 deletions
75
llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-3.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,SSE2 | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX1 | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX2 | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw,+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX512 | ||
; REQUIRES: asserts | ||
|
||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" | ||
target triple = "x86_64-unknown-linux-gnu" | ||
|
||
@A = global [1024 x double] zeroinitializer, align 128 | ||
@B = global [1024 x i8] zeroinitializer, align 128 | ||
|
||
; CHECK: LV: Checking a loop in "test" | ||
; | ||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, double* %in0, align 8 | ||
; SSE2: LV: Found an estimated cost of 9 for VF 2 For instruction: %v0 = load double, double* %in0, align 8 | ||
; SSE2: LV: Found an estimated cost of 18 for VF 4 For instruction: %v0 = load double, double* %in0, align 8 | ||
; SSE2: LV: Found an estimated cost of 36 for VF 8 For instruction: %v0 = load double, double* %in0, align 8 | ||
; | ||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, double* %in0, align 8 | ||
; AVX1: LV: Found an estimated cost of 10 for VF 2 For instruction: %v0 = load double, double* %in0, align 8 | ||
; AVX1: LV: Found an estimated cost of 24 for VF 4 For instruction: %v0 = load double, double* %in0, align 8 | ||
; AVX1: LV: Found an estimated cost of 48 for VF 8 For instruction: %v0 = load double, double* %in0, align 8 | ||
; AVX1: LV: Found an estimated cost of 96 for VF 16 For instruction: %v0 = load double, double* %in0, align 8 | ||
; | ||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, double* %in0, align 8 | ||
; AVX2: LV: Found an estimated cost of 10 for VF 2 For instruction: %v0 = load double, double* %in0, align 8 | ||
; AVX2: LV: Found an estimated cost of 24 for VF 4 For instruction: %v0 = load double, double* %in0, align 8 | ||
; AVX2: LV: Found an estimated cost of 48 for VF 8 For instruction: %v0 = load double, double* %in0, align 8 | ||
; AVX2: LV: Found an estimated cost of 96 for VF 16 For instruction: %v0 = load double, double* %in0, align 8 | ||
; | ||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, double* %in0, align 8 | ||
; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load double, double* %in0, align 8 | ||
; AVX512: LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load double, double* %in0, align 8 | ||
; AVX512: LV: Found an estimated cost of 12 for VF 8 For instruction: %v0 = load double, double* %in0, align 8 | ||
; AVX512: LV: Found an estimated cost of 51 for VF 16 For instruction: %v0 = load double, double* %in0, align 8 | ||
; AVX512: LV: Found an estimated cost of 120 for VF 32 For instruction: %v0 = load double, double* %in0, align 8 | ||
; AVX512: LV: Found an estimated cost of 240 for VF 64 For instruction: %v0 = load double, double* %in0, align 8 | ||
; | ||
; CHECK-NOT: LV: Found an estimated cost of {{[0-9]+}} for VF {{[0-9]+}} For instruction: %v0 = load double, double* %in0, align 8 | ||
|
||
define void @test() { | ||
entry: | ||
br label %for.body | ||
|
||
for.body: | ||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] | ||
|
||
%iv.0 = add nuw nsw i64 %iv, 0 | ||
%iv.1 = add nuw nsw i64 %iv, 1 | ||
%iv.2 = add nuw nsw i64 %iv, 2 | ||
|
||
%in0 = getelementptr inbounds [1024 x double], [1024 x double]* @A, i64 0, i64 %iv.0 | ||
%in1 = getelementptr inbounds [1024 x double], [1024 x double]* @A, i64 0, i64 %iv.1 | ||
%in2 = getelementptr inbounds [1024 x double], [1024 x double]* @A, i64 0, i64 %iv.2 | ||
|
||
%v0 = load double, double* %in0 | ||
%v1 = load double, double* %in1 | ||
%v2 = load double, double* %in2 | ||
|
||
%reduce.add.0 = fadd double %v0, %v1 | ||
%reduce.add.1 = fadd double %reduce.add.0, %v2 | ||
|
||
%reduce.add.1.narrow = fptoui double %reduce.add.1 to i8 | ||
|
||
%out = getelementptr inbounds [1024 x i8], [1024 x i8]* @B, i64 0, i64 %iv.0 | ||
store i8 %reduce.add.1.narrow, i8* %out | ||
|
||
%iv.next = add nuw nsw i64 %iv.0, 3 | ||
%cmp = icmp ult i64 %iv.next, 1024 | ||
br i1 %cmp, label %for.body, label %for.cond.cleanup | ||
|
||
for.cond.cleanup: | ||
ret void | ||
} |
75 changes: 75 additions & 0 deletions
75
llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-3.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,SSE2 | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX1 | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX2 | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw,+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX512 | ||
; REQUIRES: asserts | ||
|
||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" | ||
target triple = "x86_64-unknown-linux-gnu" | ||
|
||
@A = global [1024 x i64] zeroinitializer, align 128 | ||
@B = global [1024 x i8] zeroinitializer, align 128 | ||
|
||
; CHECK: LV: Checking a loop in "test" | ||
; | ||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, i64* %in0, align 8 | ||
; SSE2: LV: Found an estimated cost of 21 for VF 2 For instruction: %v0 = load i64, i64* %in0, align 8 | ||
; SSE2: LV: Found an estimated cost of 42 for VF 4 For instruction: %v0 = load i64, i64* %in0, align 8 | ||
; SSE2: LV: Found an estimated cost of 84 for VF 8 For instruction: %v0 = load i64, i64* %in0, align 8 | ||
; | ||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, i64* %in0, align 8 | ||
; AVX1: LV: Found an estimated cost of 16 for VF 2 For instruction: %v0 = load i64, i64* %in0, align 8 | ||
; AVX1: LV: Found an estimated cost of 39 for VF 4 For instruction: %v0 = load i64, i64* %in0, align 8 | ||
; AVX1: LV: Found an estimated cost of 78 for VF 8 For instruction: %v0 = load i64, i64* %in0, align 8 | ||
; AVX1: LV: Found an estimated cost of 156 for VF 16 For instruction: %v0 = load i64, i64* %in0, align 8 | ||
; | ||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, i64* %in0, align 8 | ||
; AVX2: LV: Found an estimated cost of 16 for VF 2 For instruction: %v0 = load i64, i64* %in0, align 8 | ||
; AVX2: LV: Found an estimated cost of 39 for VF 4 For instruction: %v0 = load i64, i64* %in0, align 8 | ||
; AVX2: LV: Found an estimated cost of 78 for VF 8 For instruction: %v0 = load i64, i64* %in0, align 8 | ||
; AVX2: LV: Found an estimated cost of 156 for VF 16 For instruction: %v0 = load i64, i64* %in0, align 8 | ||
; | ||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, i64* %in0, align 8 | ||
; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, i64* %in0, align 8 | ||
; AVX512: LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load i64, i64* %in0, align 8 | ||
; AVX512: LV: Found an estimated cost of 12 for VF 8 For instruction: %v0 = load i64, i64* %in0, align 8 | ||
; AVX512: LV: Found an estimated cost of 51 for VF 16 For instruction: %v0 = load i64, i64* %in0, align 8 | ||
; AVX512: LV: Found an estimated cost of 120 for VF 32 For instruction: %v0 = load i64, i64* %in0, align 8 | ||
; AVX512: LV: Found an estimated cost of 240 for VF 64 For instruction: %v0 = load i64, i64* %in0, align 8 | ||
; | ||
; CHECK-NOT: LV: Found an estimated cost of {{[0-9]+}} for VF {{[0-9]+}} For instruction: %v0 = load i64, i64* %in0, align 8 | ||
|
||
define void @test() { | ||
entry: | ||
br label %for.body | ||
|
||
for.body: | ||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] | ||
|
||
%iv.0 = add nuw nsw i64 %iv, 0 | ||
%iv.1 = add nuw nsw i64 %iv, 1 | ||
%iv.2 = add nuw nsw i64 %iv, 2 | ||
|
||
%in0 = getelementptr inbounds [1024 x i64], [1024 x i64]* @A, i64 0, i64 %iv.0 | ||
%in1 = getelementptr inbounds [1024 x i64], [1024 x i64]* @A, i64 0, i64 %iv.1 | ||
%in2 = getelementptr inbounds [1024 x i64], [1024 x i64]* @A, i64 0, i64 %iv.2 | ||
|
||
%v0 = load i64, i64* %in0 | ||
%v1 = load i64, i64* %in1 | ||
%v2 = load i64, i64* %in2 | ||
|
||
%reduce.add.0 = add i64 %v0, %v1 | ||
%reduce.add.1 = add i64 %reduce.add.0, %v2 | ||
|
||
%reduce.add.1.narrow = trunc i64 %reduce.add.1 to i8 | ||
|
||
%out = getelementptr inbounds [1024 x i8], [1024 x i8]* @B, i64 0, i64 %iv.0 | ||
store i8 %reduce.add.1.narrow, i8* %out | ||
|
||
%iv.next = add nuw nsw i64 %iv.0, 3 | ||
%cmp = icmp ult i64 %iv.next, 1024 | ||
br i1 %cmp, label %for.body, label %for.cond.cleanup | ||
|
||
for.cond.cleanup: | ||
ret void | ||
} |
76 changes: 76 additions & 0 deletions
76
llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-3.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,SSE2 | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX1 | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX2 | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw,+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX512 | ||
; REQUIRES: asserts | ||
|
||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" | ||
target triple = "x86_64-unknown-linux-gnu" | ||
|
||
@A = global [1024 x i8] zeroinitializer, align 128 | ||
@B = global [1024 x double] zeroinitializer, align 128 | ||
|
||
; CHECK: LV: Checking a loop in "test" | ||
; | ||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, double* %out2, align 8 | ||
; SSE2: LV: Found an estimated cost of 10 for VF 2 For instruction: store double %v2, double* %out2, align 8 | ||
; SSE2: LV: Found an estimated cost of 20 for VF 4 For instruction: store double %v2, double* %out2, align 8 | ||
; SSE2: LV: Found an estimated cost of 40 for VF 8 For instruction: store double %v2, double* %out2, align 8 | ||
; | ||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, double* %out2, align 8 | ||
; AVX1: LV: Found an estimated cost of 11 for VF 2 For instruction: store double %v2, double* %out2, align 8 | ||
; AVX1: LV: Found an estimated cost of 27 for VF 4 For instruction: store double %v2, double* %out2, align 8 | ||
; AVX1: LV: Found an estimated cost of 54 for VF 8 For instruction: store double %v2, double* %out2, align 8 | ||
; AVX1: LV: Found an estimated cost of 108 for VF 16 For instruction: store double %v2, double* %out2, align 8 | ||
; | ||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, double* %out2, align 8 | ||
; AVX2: LV: Found an estimated cost of 11 for VF 2 For instruction: store double %v2, double* %out2, align 8 | ||
; AVX2: LV: Found an estimated cost of 27 for VF 4 For instruction: store double %v2, double* %out2, align 8 | ||
; AVX2: LV: Found an estimated cost of 54 for VF 8 For instruction: store double %v2, double* %out2, align 8 | ||
; AVX2: LV: Found an estimated cost of 108 for VF 16 For instruction: store double %v2, double* %out2, align 8 | ||
; | ||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, double* %out2, align 8 | ||
; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction: store double %v2, double* %out2, align 8 | ||
; AVX512: LV: Found an estimated cost of 8 for VF 4 For instruction: store double %v2, double* %out2, align 8 | ||
; AVX512: LV: Found an estimated cost of 12 for VF 8 For instruction: store double %v2, double* %out2, align 8 | ||
; AVX512: LV: Found an estimated cost of 24 for VF 16 For instruction: store double %v2, double* %out2, align 8 | ||
; AVX512: LV: Found an estimated cost of 48 for VF 32 For instruction: store double %v2, double* %out2, align 8 | ||
; AVX512: LV: Found an estimated cost of 96 for VF 64 For instruction: store double %v2, double* %out2, align 8 | ||
; | ||
; CHECK-NOT: LV: Found an estimated cost of {{[0-9]+}} for VF {{[0-9]+}} For instruction: store double %v2, double* %out2, align 8 | ||
|
||
define void @test() { | ||
entry: | ||
br label %for.body | ||
|
||
for.body: | ||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] | ||
|
||
%iv.0 = add nuw nsw i64 %iv, 0 | ||
%iv.1 = add nuw nsw i64 %iv, 1 | ||
%iv.2 = add nuw nsw i64 %iv, 2 | ||
|
||
%in = getelementptr inbounds [1024 x i8], [1024 x i8]* @A, i64 0, i64 %iv.0 | ||
%v.narrow = load i8, i8* %in | ||
|
||
%v = uitofp i8 %v.narrow to double | ||
|
||
%v0 = fadd double %v, 0.0 | ||
%v1 = fadd double %v, 1.0 | ||
%v2 = fadd double %v, 2.0 | ||
|
||
%out0 = getelementptr inbounds [1024 x double], [1024 x double]* @B, i64 0, i64 %iv.0 | ||
%out1 = getelementptr inbounds [1024 x double], [1024 x double]* @B, i64 0, i64 %iv.1 | ||
%out2 = getelementptr inbounds [1024 x double], [1024 x double]* @B, i64 0, i64 %iv.2 | ||
|
||
store double %v0, double* %out0 | ||
store double %v1, double* %out1 | ||
store double %v2, double* %out2 | ||
|
||
%iv.next = add nuw nsw i64 %iv.0, 3 | ||
%cmp = icmp ult i64 %iv.next, 1024 | ||
br i1 %cmp, label %for.body, label %for.cond.cleanup | ||
|
||
for.cond.cleanup: | ||
ret void | ||
} |
76 changes: 76 additions & 0 deletions
76
llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-3.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,SSE2 | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX1 | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX2 | ||
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw,+avx512vl --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX512 | ||
; REQUIRES: asserts | ||
|
||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" | ||
target triple = "x86_64-unknown-linux-gnu" | ||
|
||
@A = global [1024 x i8] zeroinitializer, align 128 | ||
@B = global [1024 x i64] zeroinitializer, align 128 | ||
|
||
; CHECK: LV: Checking a loop in "test" | ||
; | ||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, i64* %out2, align 8 | ||
; SSE2: LV: Found an estimated cost of 22 for VF 2 For instruction: store i64 %v2, i64* %out2, align 8 | ||
; SSE2: LV: Found an estimated cost of 44 for VF 4 For instruction: store i64 %v2, i64* %out2, align 8 | ||
; SSE2: LV: Found an estimated cost of 88 for VF 8 For instruction: store i64 %v2, i64* %out2, align 8 | ||
; | ||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, i64* %out2, align 8 | ||
; AVX1: LV: Found an estimated cost of 17 for VF 2 For instruction: store i64 %v2, i64* %out2, align 8 | ||
; AVX1: LV: Found an estimated cost of 39 for VF 4 For instruction: store i64 %v2, i64* %out2, align 8 | ||
; AVX1: LV: Found an estimated cost of 78 for VF 8 For instruction: store i64 %v2, i64* %out2, align 8 | ||
; AVX1: LV: Found an estimated cost of 156 for VF 16 For instruction: store i64 %v2, i64* %out2, align 8 | ||
; | ||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, i64* %out2, align 8 | ||
; AVX2: LV: Found an estimated cost of 17 for VF 2 For instruction: store i64 %v2, i64* %out2, align 8 | ||
; AVX2: LV: Found an estimated cost of 39 for VF 4 For instruction: store i64 %v2, i64* %out2, align 8 | ||
; AVX2: LV: Found an estimated cost of 78 for VF 8 For instruction: store i64 %v2, i64* %out2, align 8 | ||
; AVX2: LV: Found an estimated cost of 156 for VF 16 For instruction: store i64 %v2, i64* %out2, align 8 | ||
; | ||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, i64* %out2, align 8 | ||
; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction: store i64 %v2, i64* %out2, align 8 | ||
; AVX512: LV: Found an estimated cost of 8 for VF 4 For instruction: store i64 %v2, i64* %out2, align 8 | ||
; AVX512: LV: Found an estimated cost of 12 for VF 8 For instruction: store i64 %v2, i64* %out2, align 8 | ||
; AVX512: LV: Found an estimated cost of 24 for VF 16 For instruction: store i64 %v2, i64* %out2, align 8 | ||
; AVX512: LV: Found an estimated cost of 48 for VF 32 For instruction: store i64 %v2, i64* %out2, align 8 | ||
; AVX512: LV: Found an estimated cost of 96 for VF 64 For instruction: store i64 %v2, i64* %out2, align 8 | ||
; | ||
; CHECK-NOT: LV: Found an estimated cost of {{[0-9]+}} for VF {{[0-9]+}} For instruction: store i64 %v2, i64* %out2, align 8 | ||
|
||
define void @test() { | ||
entry: | ||
br label %for.body | ||
|
||
for.body: | ||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] | ||
|
||
%iv.0 = add nuw nsw i64 %iv, 0 | ||
%iv.1 = add nuw nsw i64 %iv, 1 | ||
%iv.2 = add nuw nsw i64 %iv, 2 | ||
|
||
%in = getelementptr inbounds [1024 x i8], [1024 x i8]* @A, i64 0, i64 %iv.0 | ||
%v.narrow = load i8, i8* %in | ||
|
||
%v = zext i8 %v.narrow to i64 | ||
|
||
%v0 = add i64 %v, 0 | ||
%v1 = add i64 %v, 1 | ||
%v2 = add i64 %v, 2 | ||
|
||
%out0 = getelementptr inbounds [1024 x i64], [1024 x i64]* @B, i64 0, i64 %iv.0 | ||
%out1 = getelementptr inbounds [1024 x i64], [1024 x i64]* @B, i64 0, i64 %iv.1 | ||
%out2 = getelementptr inbounds [1024 x i64], [1024 x i64]* @B, i64 0, i64 %iv.2 | ||
|
||
store i64 %v0, i64* %out0 | ||
store i64 %v1, i64* %out1 | ||
store i64 %v2, i64* %out2 | ||
|
||
%iv.next = add nuw nsw i64 %iv.0, 3 | ||
%cmp = icmp ult i64 %iv.next, 1024 | ||
br i1 %cmp, label %for.body, label %for.cond.cleanup | ||
|
||
for.cond.cleanup: | ||
ret void | ||
} |