Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[TTI][X86] update costs of interleaved load\store of i64\double
This patch contains more accurate cost of interelaved load\store of stride 2 for the types int64\double on AVX2. Reviewers: delena, RKSimon, craig.topper, dorit Reviewed By: dorit Differential Revision: https://reviews.llvm.org/D40008 llvm-svn: 318385
- Loading branch information
Mohammed Agabaria
committed
Nov 16, 2017
1 parent
fe3eda9
commit 6e6d532
Showing
3 changed files
with
86 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
40 changes: 40 additions & 0 deletions
40
llvm/test/Analysis/CostModel/X86/interleaved-load-store-double.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
; REQUIRES: asserts | ||
; RUN: opt -S -loop-vectorize -debug-only=loop-vectorize -mcpu=skylake %s 2>&1 | FileCheck %s | ||
target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" | ||
target triple = "i386-unknown-linux-gnu" | ||
|
||
@doublesrc = common local_unnamed_addr global [120 x double] zeroinitializer, align 4 | ||
@doubledst = common local_unnamed_addr global [120 x double] zeroinitializer, align 4 | ||
|
||
; Function Attrs: norecurse nounwind | ||
define void @stride2double(double %k, i32 %width_) { | ||
entry: | ||
|
||
; CHECK: Found an estimated cost of 8 for VF 4 For instruction: %0 = load double | ||
; CHECK: Found an estimated cost of 8 for VF 4 For instruction: store double | ||
|
||
%cmp27 = icmp sgt i32 %width_, 0 | ||
br i1 %cmp27, label %for.body.lr.ph, label %for.cond.cleanup | ||
|
||
for.body.lr.ph: ; preds = %entry | ||
br label %for.body | ||
|
||
for.cond.cleanup: ; preds = %for.body, %entry | ||
ret void | ||
|
||
for.body: ; preds = %for.body.lr.ph, %for.body | ||
%i.028 = phi i32 [ 0, %for.body.lr.ph ], [ %add16, %for.body ] | ||
%arrayidx = getelementptr inbounds [120 x double], [120 x double]* @doublesrc, i32 0, i32 %i.028 | ||
%0 = load double, double* %arrayidx, align 4 | ||
%arrayidx2 = getelementptr inbounds [120 x double], [120 x double]* @doubledst, i32 0, i32 %i.028 | ||
store double %0, double* %arrayidx2, align 4 | ||
%add4 = add nuw nsw i32 %i.028, 1 | ||
%arrayidx5 = getelementptr inbounds [120 x double], [120 x double]* @doublesrc, i32 0, i32 %add4 | ||
%1 = load double, double* %arrayidx5, align 4 | ||
%arrayidx8 = getelementptr inbounds [120 x double], [120 x double]* @doubledst, i32 0, i32 %add4 | ||
store double %1, double* %arrayidx8, align 4 | ||
%add16 = add nuw nsw i32 %i.028, 2 | ||
%cmp = icmp slt i32 %add16, %width_ | ||
br i1 %cmp, label %for.body, label %for.cond.cleanup | ||
} | ||
|
40 changes: 40 additions & 0 deletions
40
llvm/test/Analysis/CostModel/X86/interleaved-load-store-i64.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
; REQUIRES: asserts | ||
; RUN: opt -S -loop-vectorize -debug-only=loop-vectorize -mcpu=core-avx2 %s 2>&1 | FileCheck %s | ||
target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" | ||
target triple = "i386-unknown-linux-gnu" | ||
|
||
@i64src = common local_unnamed_addr global [120 x i64] zeroinitializer, align 4 | ||
@i64dst = common local_unnamed_addr global [120 x i64] zeroinitializer, align 4 | ||
|
||
; Function Attrs: norecurse nounwind | ||
define void @stride2i64(i64 %k, i32 %width_) { | ||
entry: | ||
|
||
; CHECK: Found an estimated cost of 8 for VF 4 For instruction: %0 = load i64 | ||
; CHECK: Found an estimated cost of 8 for VF 4 For instruction: store i64 | ||
|
||
%cmp27 = icmp sgt i32 %width_, 0 | ||
br i1 %cmp27, label %for.body.lr.ph, label %for.cond.cleanup | ||
|
||
for.body.lr.ph: ; preds = %entry | ||
br label %for.body | ||
|
||
for.cond.cleanup: ; preds = %for.body, %entry | ||
ret void | ||
|
||
for.body: | ||
%i.028 = phi i32 [ 0, %for.body.lr.ph ], [ %add16, %for.body ] | ||
%arrayidx = getelementptr inbounds [120 x i64], [120 x i64]* @i64src, i32 0, i32 %i.028 | ||
%0 = load i64, i64* %arrayidx, align 4 | ||
%arrayidx2 = getelementptr inbounds [120 x i64], [120 x i64]* @i64dst, i32 0, i32 %i.028 | ||
store i64 %0, i64* %arrayidx2, align 4 | ||
%add4 = add nuw nsw i32 %i.028, 1 | ||
%arrayidx5 = getelementptr inbounds [120 x i64], [120 x i64]* @i64src, i32 0, i32 %add4 | ||
%1 = load i64, i64* %arrayidx5, align 4 | ||
%arrayidx8 = getelementptr inbounds [120 x i64], [120 x i64]* @i64dst, i32 0, i32 %add4 | ||
store i64 %1, i64* %arrayidx8, align 4 | ||
%add16 = add nuw nsw i32 %i.028, 2 | ||
%cmp = icmp slt i32 %add16, %width_ | ||
br i1 %cmp, label %for.body, label %for.cond.cleanup | ||
} | ||
|