Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[AMDGPU] Add attribute for target loop unroll threshold default
Summary: Add a function attribute to allow the target specific default loop unroll threshold to be specified on a per-function basis. This allows a front-end to give guidance where it has insight that is not available to the back-end, while still allowing the target specific heuristics to also have an effect. Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D68873
- Loading branch information
Tim Corringham
committed
Nov 21, 2019
1 parent
3889ff8
commit 6821a3c
Showing
2 changed files
with
54 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
; RUN: opt < %s -S -mtriple=amdgcn-- -basicaa -loop-unroll | FileCheck %s | ||
|
||
; Check that the loop in unroll_default is not fully unrolled using the default | ||
; unroll threshold | ||
; CHECK-LABEL: @unroll_default | ||
; CHECK: entry: | ||
; CHECK: br i1 %cmp | ||
; CHECK: ret void | ||
|
||
; Check that the same loop in unroll_full is fully unrolled when the default | ||
; unroll threshold is increased by use of the amdgpu-unroll-threshold attribute | ||
; CHECK-LABEL: @unroll_full | ||
; CHECK: entry: | ||
; CHECK-NOT: br i1 %cmp | ||
; CHECK: ret void | ||
|
||
@in = internal unnamed_addr global i32* null, align 8 | ||
@out = internal unnamed_addr global i32* null, align 8 | ||
|
||
define void @unroll_default() { | ||
entry: | ||
br label %do.body | ||
|
||
do.body: ; preds = %entry | ||
%i.0 = phi i32 [ 0, %entry ], [ %inc, %do.body ] | ||
%v1 = load i64, i64* bitcast (i32** @in to i64*), align 8 | ||
store i64 %v1, i64* bitcast (i32** @out to i64*), align 8 | ||
%inc = add nsw i32 %i.0, 1 | ||
%cmp = icmp slt i32 %inc, 100 | ||
br i1 %cmp, label %do.body, label %do.end | ||
|
||
do.end: ; preds = %do.body | ||
ret void | ||
} | ||
|
||
define void @unroll_full() #0 { | ||
entry: | ||
br label %do.body | ||
|
||
do.body: ; preds = %entry | ||
%i.0 = phi i32 [ 0, %entry ], [ %inc, %do.body ] | ||
%v1 = load i64, i64* bitcast (i32** @in to i64*), align 8 | ||
store i64 %v1, i64* bitcast (i32** @out to i64*), align 8 | ||
%inc = add nsw i32 %i.0, 1 | ||
%cmp = icmp slt i32 %inc, 100 | ||
br i1 %cmp, label %do.body, label %do.end | ||
|
||
do.end: ; preds = %do.body | ||
ret void | ||
} | ||
|
||
attributes #0 = { "amdgpu-unroll-threshold"="1000" } |