Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Polly] [PPCGCodeGeneration] Skip Scops which contain function pointers.
In `PPCGCodeGeneration`, we try to take the references of every `Value` that is used within a Scop to offload to the kernel. This occurs in `GPUNodeBuilder::createLaunchParameters`. This breaks if one of the values is a function pointer, since one of these cases will trigger: 1. We try to to take the references of an intrinsic function, and this breaks at `verifyModule`, since it is illegal to take the reference of an intrinsic. 2. We manage to take the reference to a function, but this fails at `verifyModule` since the function will not be present in the module that is created in the kernel. 3. Even if `verifyModule` succeeds (which should not occur), we would then try to call a *host function* from the *device*, which is illegal runtime behaviour. So, we disable this entire range of possibilities by simply not allowing function references within a `Scop` which corresponds to a kernel. However, note that this is too conservative. We *can* allow intrinsics within kernels if the backend can lower the intrinsic correctly. For example, an intrinsic like `llvm.powi.*` can actually be lowered by the `NVPTX` backend. We will now gradually whitelist intrinsics which are known to be safe. Differential Revision: https://reviews.llvm.org/D33414 llvm-svn: 305185
- Loading branch information
Showing
2 changed files
with
120 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
82 changes: 82 additions & 0 deletions
82
polly/test/GPGPU/unknown-fn-call-not-copied-into-kernel.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
; RUN: opt %loadPolly -analyze -polly-scops < %s | FileCheck %s --check-prefix=SCOP | ||
; RUN: opt %loadPolly -S -polly-codegen-ppcg < %s | FileCheck %s | ||
|
||
; Check that we do not create a kernel if there is an | ||
; unknown function call in a candidate kernel. | ||
|
||
; Check that we model the kernel as a scop. | ||
; SCOP: Function: f | ||
; SCOP-NEXT: Region: %entry.split---%for.end13 | ||
|
||
; If a kernel were generated, then this code would have been part of the kernel | ||
; and not the `.ll` file that is generated. | ||
; CHECK: %conv = fpext float %0 to double | ||
; CHECK-NEXT: %1 = tail call double @extern.fn(double %conv) | ||
; CHECK-NEXT: %conv6 = fptrunc double %1 to float | ||
|
||
; REQUIRES: pollyacc | ||
|
||
; static const int N = 1000; | ||
; void f(float A[N][N], int n, float B[N][N]) { | ||
; for(int i = 0; i < n; i++) { | ||
; for(int j = 0; j < n; j++) { | ||
; B[i][j] = extern_fn(A[i][j], 3); | ||
; } | ||
; | ||
; } | ||
; } | ||
|
||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" | ||
target triple = "x86_64-apple-macosx10.11.0" | ||
|
||
define void @f([1000 x float]* %A, i32 %n, [1000 x float]* %B) { | ||
entry: | ||
br label %entry.split | ||
|
||
entry.split: ; preds = %entry | ||
%cmp3 = icmp sgt i32 %n, 0 | ||
br i1 %cmp3, label %for.cond1.preheader.lr.ph, label %for.end13 | ||
|
||
for.cond1.preheader.lr.ph: ; preds = %entry.split | ||
br label %for.cond1.preheader | ||
|
||
for.cond1.preheader: ; preds = %for.cond1.preheader.lr.ph, %for.inc11 | ||
%indvars.iv5 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next6, %for.inc11 ] | ||
%cmp21 = icmp sgt i32 %n, 0 | ||
br i1 %cmp21, label %for.body3.lr.ph, label %for.inc11 | ||
|
||
for.body3.lr.ph: ; preds = %for.cond1.preheader | ||
br label %for.body3 | ||
|
||
for.body3: ; preds = %for.body3.lr.ph, %for.body3 | ||
%indvars.iv = phi i64 [ 0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ] | ||
%arrayidx5 = getelementptr inbounds [1000 x float], [1000 x float]* %A, i64 %indvars.iv5, i64 %indvars.iv | ||
%0 = load float, float* %arrayidx5, align 4 | ||
%conv = fpext float %0 to double | ||
%1 = tail call double @extern.fn(double %conv) | ||
%conv6 = fptrunc double %1 to float | ||
%arrayidx10 = getelementptr inbounds [1000 x float], [1000 x float]* %B, i64 %indvars.iv5, i64 %indvars.iv | ||
store float %conv6, float* %arrayidx10, align 4 | ||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | ||
%wide.trip.count = zext i32 %n to i64 | ||
%exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count | ||
br i1 %exitcond, label %for.body3, label %for.cond1.for.inc11_crit_edge | ||
|
||
for.cond1.for.inc11_crit_edge: ; preds = %for.body3 | ||
br label %for.inc11 | ||
|
||
for.inc11: ; preds = %for.cond1.for.inc11_crit_edge, %for.cond1.preheader | ||
%indvars.iv.next6 = add nuw nsw i64 %indvars.iv5, 1 | ||
%wide.trip.count7 = zext i32 %n to i64 | ||
%exitcond8 = icmp ne i64 %indvars.iv.next6, %wide.trip.count7 | ||
br i1 %exitcond8, label %for.cond1.preheader, label %for.cond.for.end13_crit_edge | ||
|
||
for.cond.for.end13_crit_edge: ; preds = %for.inc11 | ||
br label %for.end13 | ||
|
||
for.end13: ; preds = %for.cond.for.end13_crit_edge, %entry.split | ||
ret void | ||
} | ||
|
||
declare double @extern.fn(double) #0 | ||
attributes #0 = { readnone } |