Skip to content

Commit

Permalink
[Polly] [PPCGCodeGeneration] Skip Scops which contain function pointers.
Browse files Browse the repository at this point in the history
In `PPCGCodeGeneration`, we try to take the references of every `Value`
that is used within a Scop to offload to the kernel. This occurs in
`GPUNodeBuilder::createLaunchParameters`.

This breaks if one of the values is a function pointer, since one of
these cases will trigger:

1. We try to to take the references of an intrinsic function, and this
breaks at `verifyModule`, since it is illegal to take the reference of
an intrinsic.

2. We manage to take the reference to a function, but this fails at
`verifyModule` since the function will not be present in the module that
is created in the kernel.

3. Even if `verifyModule` succeeds (which should not occur), we would
then try to call a *host function* from the *device*, which is
illegal runtime behaviour.

So, we disable this entire range of possibilities by simply not allowing
function references within a `Scop` which corresponds to a kernel.

However, note that this is too conservative. We *can* allow intrinsics
within kernels if the backend can lower the intrinsic correctly. For
example, an intrinsic like `llvm.powi.*` can actually be lowered by the `NVPTX`
backend.

We will now gradually whitelist intrinsics which are known to be safe.

Differential Revision: https://reviews.llvm.org/D33414

llvm-svn: 305185
  • Loading branch information
bollu committed Jun 12, 2017
1 parent b079c8b commit bccaea5
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 0 deletions.
38 changes: 38 additions & 0 deletions polly/lib/CodeGen/PPCGCodeGeneration.cpp
Expand Up @@ -2611,6 +2611,36 @@ class PPCGCodeGeneration : public ScopPass {
return isl_ast_expr_ge(Iterations, MinComputeExpr);
}

/// Check whether the Block contains any Function value.
bool ContainsFnPtrValInBlock(const BasicBlock *BB) {
for (const Instruction &Inst : *BB)
for (Value *SrcVal : Inst.operands()) {
PointerType *p = dyn_cast<PointerType>(SrcVal->getType());
if (!p)
continue;
if (isa<FunctionType>(p->getElementType()))
return true;
}
return false;
}

/// Return whether the Scop S has functions.
bool ContainsFnPtr(const Scop &S) {
for (auto &Stmt : S) {
if (Stmt.isBlockStmt()) {
if (ContainsFnPtrValInBlock(Stmt.getBasicBlock()))
return true;
} else {
assert(Stmt.isRegionStmt() &&
"Stmt was neither block nor region statement");
for (const BasicBlock *BB : Stmt.getRegion()->blocks())
if (ContainsFnPtrValInBlock(BB))
return true;
}
}
return false;
}

/// Generate code for a given GPU AST described by @p Root.
///
/// @param Root An isl_ast_node pointing to the root of the GPU AST.
Expand Down Expand Up @@ -2681,6 +2711,14 @@ class PPCGCodeGeneration : public ScopPass {
if (S->hasInvariantAccesses())
return false;

// We currently do not support functions inside kernels, as code
// generation will need to offload function calls to the kernel.
// This may lead to a kernel trying to call a function on the host.
// This also allows us to prevent codegen from trying to take the
// address of an intrinsic function to send to the kernel.
if (ContainsFnPtr(CurrentScop))
return false;

auto PPCGScop = createPPCGScop();
auto PPCGProg = createPPCGProg(PPCGScop);
auto PPCGGen = generateGPU(PPCGScop, PPCGProg);
Expand Down
82 changes: 82 additions & 0 deletions polly/test/GPGPU/unknown-fn-call-not-copied-into-kernel.ll
@@ -0,0 +1,82 @@
; RUN: opt %loadPolly -analyze -polly-scops < %s | FileCheck %s --check-prefix=SCOP
; RUN: opt %loadPolly -S -polly-codegen-ppcg < %s | FileCheck %s

; Check that we do not create a kernel if there is an
; unknown function call in a candidate kernel.

; Check that we model the kernel as a scop.
; SCOP: Function: f
; SCOP-NEXT: Region: %entry.split---%for.end13

; If a kernel were generated, then this code would have been part of the kernel
; and not the `.ll` file that is generated.
; CHECK: %conv = fpext float %0 to double
; CHECK-NEXT: %1 = tail call double @extern.fn(double %conv)
; CHECK-NEXT: %conv6 = fptrunc double %1 to float

; REQUIRES: pollyacc

; static const int N = 1000;
; void f(float A[N][N], int n, float B[N][N]) {
; for(int i = 0; i < n; i++) {
; for(int j = 0; j < n; j++) {
; B[i][j] = extern_fn(A[i][j], 3);
; }
;
; }
; }

target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.11.0"

define void @f([1000 x float]* %A, i32 %n, [1000 x float]* %B) {
entry:
br label %entry.split

entry.split: ; preds = %entry
%cmp3 = icmp sgt i32 %n, 0
br i1 %cmp3, label %for.cond1.preheader.lr.ph, label %for.end13

for.cond1.preheader.lr.ph: ; preds = %entry.split
br label %for.cond1.preheader

for.cond1.preheader: ; preds = %for.cond1.preheader.lr.ph, %for.inc11
%indvars.iv5 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next6, %for.inc11 ]
%cmp21 = icmp sgt i32 %n, 0
br i1 %cmp21, label %for.body3.lr.ph, label %for.inc11

for.body3.lr.ph: ; preds = %for.cond1.preheader
br label %for.body3

for.body3: ; preds = %for.body3.lr.ph, %for.body3
%indvars.iv = phi i64 [ 0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
%arrayidx5 = getelementptr inbounds [1000 x float], [1000 x float]* %A, i64 %indvars.iv5, i64 %indvars.iv
%0 = load float, float* %arrayidx5, align 4
%conv = fpext float %0 to double
%1 = tail call double @extern.fn(double %conv)
%conv6 = fptrunc double %1 to float
%arrayidx10 = getelementptr inbounds [1000 x float], [1000 x float]* %B, i64 %indvars.iv5, i64 %indvars.iv
store float %conv6, float* %arrayidx10, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%wide.trip.count = zext i32 %n to i64
%exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
br i1 %exitcond, label %for.body3, label %for.cond1.for.inc11_crit_edge

for.cond1.for.inc11_crit_edge: ; preds = %for.body3
br label %for.inc11

for.inc11: ; preds = %for.cond1.for.inc11_crit_edge, %for.cond1.preheader
%indvars.iv.next6 = add nuw nsw i64 %indvars.iv5, 1
%wide.trip.count7 = zext i32 %n to i64
%exitcond8 = icmp ne i64 %indvars.iv.next6, %wide.trip.count7
br i1 %exitcond8, label %for.cond1.preheader, label %for.cond.for.end13_crit_edge

for.cond.for.end13_crit_edge: ; preds = %for.inc11
br label %for.end13

for.end13: ; preds = %for.cond.for.end13_crit_edge, %entry.split
ret void
}

declare double @extern.fn(double) #0
attributes #0 = { readnone }

0 comments on commit bccaea5

Please sign in to comment.