Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
If a (assumed) invariant location is loaded multiple times we generated a parameter for each location. However, this caused compile time problems for several benchmarks (e.g., 445_gobmk in SPEC2006 and BT in the NAS benchmarks). Additionally, the code we generate is suboptimal as we preload the same location multiple times and perform the same checks on all the parameters that refere to the same value. With this patch we consolidate the invariant loads in three steps: 1) During SCoP initialization required invariant loads are put in equivalence classes based on their pointer operand. One representing load is used to generate a parameter for the whole class, thus we never generate multiple parameters for the same location. 2) During the SCoP simplification we remove invariant memory accesses that are in the same equivalence class. While doing so we build the union of all execution domains as it is only important that the location is at least accessed once. 3) During code generation we only preload one element of each equivalence class with the unified execution domain. All others are mapped to that preloaded value. Differential Revision: http://reviews.llvm.org/D13338 llvm-svn: 249853
- Loading branch information
Johannes Doerfert
committed
Oct 9, 2015
1 parent
769e1a9
commit 697fdf8
Showing
12 changed files
with
480 additions
and
53 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
35 changes: 35 additions & 0 deletions
35
polly/test/Isl/CodeGen/OpenMP/invariant_base_pointers_preloaded.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
; RUN: opt %loadPolly -polly-codegen -polly-parallel \ | ||
; RUN: -polly-parallel-force -S < %s | FileCheck %s | ||
; | ||
; Test to verify that we hand down the preloaded A[0] to the OpenMP subfunction. | ||
; | ||
; void f(float *A) { | ||
; for (int i = 1; i < 1000; i++) | ||
; A[i] += A[0] + A[0]; | ||
; } | ||
; | ||
; CHECK: %polly.subfn.storeaddr.polly.access.A.load = getelementptr inbounds | ||
; CHECK: store float %polly.access.A.load, float* %polly.subfn.storeaddr.polly.access.A.load | ||
; | ||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" | ||
|
||
define void @f(float* nocapture %A) { | ||
entry: | ||
br label %for.body | ||
|
||
for.cond.cleanup: ; preds = %for.body | ||
ret void | ||
|
||
for.body: ; preds = %for.body, %entry | ||
%indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body ] | ||
%tmp = load float, float* %A, align 4 | ||
%tmp2 = load float, float* %A, align 4 | ||
%tmpadd = fadd float %tmp, %tmp2 | ||
%arrayidx1 = getelementptr inbounds float, float* %A, i64 %indvars.iv | ||
%tmp1 = load float, float* %arrayidx1, align 4 | ||
%add = fadd float %tmp2, %tmp1 | ||
store float %add, float* %arrayidx1, align 4 | ||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | ||
%exitcond = icmp eq i64 %indvars.iv.next, 1000 | ||
br i1 %exitcond, label %for.cond.cleanup, label %for.body | ||
} |
Oops, something went wrong.