Skip to content

Commit

Permalink
Enable IPConstantPropagation to work with abstract call sites
Browse files Browse the repository at this point in the history
This modification of the currently unused inter-procedural constant
propagation pass (IPConstantPropagation) shows how abstract call sites
enable optimization of callback calls alongside direct and indirect
calls. Through minimal changes, mostly dealing with the partial mapping
of callbacks, inter-procedural constant propagation was enabled for
callbacks, e.g., OpenMP runtime calls or pthreads_create.

Differential Revision: https://reviews.llvm.org/D56447

llvm-svn: 351628
  • Loading branch information
Johannes Doerfert committed Jan 19, 2019
1 parent 1825184 commit 36872b5
Show file tree
Hide file tree
Showing 5 changed files with 321 additions and 12 deletions.
35 changes: 23 additions & 12 deletions llvm/lib/Transforms/IPO/IPConstantPropagation.cpp
Expand Up @@ -62,32 +62,43 @@ static bool PropagateConstantsIntoArguments(Function &F) {
// Ignore blockaddress uses.
if (isa<BlockAddress>(UR)) continue;

// Used by a non-instruction, or not the callee of a function, do not
// transform.
if (!isa<CallInst>(UR) && !isa<InvokeInst>(UR))
return false;

CallSite CS(cast<Instruction>(UR));
if (!CS.isCallee(&U))
// If no abstract call site was created we did not understand the use, bail.
AbstractCallSite ACS(&U);
if (!ACS)
return false;

// Check out all of the potentially constant arguments. Note that we don't
// inspect varargs here.
CallSite::arg_iterator AI = CS.arg_begin();
Function::arg_iterator Arg = F.arg_begin();
for (unsigned i = 0, e = ArgumentConstants.size(); i != e;
++i, ++AI, ++Arg) {
for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++Arg) {

// If this argument is known non-constant, ignore it.
if (ArgumentConstants[i].second)
continue;

Constant *C = dyn_cast<Constant>(*AI);
Value *V = ACS.getCallArgOperand(i);
Constant *C = dyn_cast_or_null<Constant>(V);

// We can only propagate thread independent values through callbacks.
// This is different to direct/indirect call sites because for them we
// know the thread executing the caller and callee is the same. For
// callbacks this is not guaranteed, thus a thread dependent value could
// be different for the caller and callee, making it invalid to propagate.
if (C && ACS.isCallbackCall() && C->isThreadDependent()) {
// Argument became non-constant. If all arguments are non-constant now,
// give up on this function.
if (++NumNonconstant == ArgumentConstants.size())
return false;

ArgumentConstants[i].second = true;
continue;
}

if (C && ArgumentConstants[i].first == nullptr) {
ArgumentConstants[i].first = C; // First constant seen.
} else if (C && ArgumentConstants[i].first == C) {
// Still the constant value we think it is.
} else if (*AI == &*Arg) {
} else if (V == &*Arg) {
// Ignore recursive calls passing argument down.
} else {
// Argument became non-constant. If all arguments are non-constant now,
Expand Down
87 changes: 87 additions & 0 deletions llvm/test/Transforms/IPConstantProp/multiple_callbacks.ll
@@ -0,0 +1,87 @@
; RUN: opt -ipconstprop -S < %s | FileCheck %s
;
;
; /---------------------------------------|
; | /----------------------|----|
; | | /-----| |
; V V V | |
; void broker(int (*cb0)(int), int (*cb1)(int), int (*cb2)(int), int, int);
;
; static int cb0(int zero) {
; return zero;
; }
; static int cb1(int unknown) {
; return unknown;
; }
; static int cb2(int unknown) {
; cb0(0);
; return unknown;
; }
; static int cb3(int unknown) {
; return unknown;
; }
; static int cb4(int unknown) {
; return unknown;
; }
;
; void foo() {
; cb0(0);
; cb3(1);
; broker(cb0, cb1, cb0, 0, 1);
; broker(cb1, cb2, cb2, 0, 1);
; broker(cb3, cb2, cb3, 0, 1);
; broker(cb4, cb4, cb4, 0, 1);
; }
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

define internal i32 @cb0(i32 %zero) {
entry:
; CHECK: @cb0
; CHECK-NEXT: entry
; CHECK-NEXT: ret i32 0
ret i32 %zero
}

define internal i32 @cb1(i32 %unknown) {
entry:
; CHECK: ret i32 %unknown
ret i32 %unknown
}

define internal i32 @cb2(i32 %unknown) {
entry:
%call = call i32 @cb0(i32 0)
; CHECK: ret i32 %unknown
ret i32 %unknown
}

define internal i32 @cb3(i32 %unknown) {
entry:
; CHECK: ret i32 %unknown
ret i32 %unknown
}

define internal i32 @cb4(i32 %unknown) {
entry:
; CHECK: ret i32 %unknown
ret i32 %unknown
}

define void @foo() {
entry:
%call = call i32 @cb0(i32 0)
%call1 = call i32 @cb3(i32 1)
call void @broker(i32 (i32)* nonnull @cb0, i32 (i32)* nonnull @cb1, i32 (i32)* nonnull @cb0, i32 0, i32 1)
call void @broker(i32 (i32)* nonnull @cb1, i32 (i32)* nonnull @cb2, i32 (i32)* nonnull @cb2, i32 0, i32 1)
call void @broker(i32 (i32)* nonnull @cb3, i32 (i32)* nonnull @cb2, i32 (i32)* nonnull @cb3, i32 0, i32 1)
call void @broker(i32 (i32)* nonnull @cb4, i32 (i32)* nonnull @cb4, i32 (i32)* nonnull @cb4, i32 0, i32 1)
ret void
}

declare !callback !3 void @broker(i32 (i32)*, i32 (i32)*, i32 (i32)*, i32, i32)

!0 = !{i64 0, i64 3, i1 false}
!1 = !{i64 1, i64 4, i1 false}
!2 = !{i64 2, i64 3, i1 false}
!3 = !{!0, !2, !1}
120 changes: 120 additions & 0 deletions llvm/test/Transforms/IPConstantProp/openmp_parallel_for.ll
@@ -0,0 +1,120 @@
; RUN: opt -S -ipconstprop < %s | FileCheck %s
;
; void bar(int, float, double);
;
; void foo(int N) {
; float p = 3;
; double q = 5;
; N = 7;
;
; #pragma omp parallel for firstprivate(q)
; for (int i = 2; i < N; i++) {
; bar(i, p, q);
; }
; }
;
; Verify the constant value of q is propagated into the outlined function.
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

%struct.ident_t = type { i32, i32, i32, i32, i8* }

@.str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 514, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8
@1 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8

define dso_local void @foo(i32 %N) {
entry:
%N.addr = alloca i32, align 4
%p = alloca float, align 4
store i32 %N, i32* %N.addr, align 4
store float 3.000000e+00, float* %p, align 4
store i32 7, i32* %N.addr, align 4
call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* nonnull %N.addr, float* nonnull %p, i64 4617315517961601024)
ret void
}

define internal void @.omp_outlined.(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %N, float* dereferenceable(4) %p, i64 %q) {
entry:
%q.addr = alloca i64, align 8
%.omp.lb = alloca i32, align 4
%.omp.ub = alloca i32, align 4
%.omp.stride = alloca i32, align 4
%.omp.is_last = alloca i32, align 4
; CHECK: store i64 4617315517961601024, i64* %q.addr, align 8
store i64 %q, i64* %q.addr, align 8
%conv = bitcast i64* %q.addr to double*
%tmp = load i32, i32* %N, align 4
%sub3 = add nsw i32 %tmp, -3
%cmp = icmp sgt i32 %tmp, 2
br i1 %cmp, label %omp.precond.then, label %omp.precond.end

omp.precond.then: ; preds = %entry
store i32 0, i32* %.omp.lb, align 4
store i32 %sub3, i32* %.omp.ub, align 4
store i32 1, i32* %.omp.stride, align 4
store i32 0, i32* %.omp.is_last, align 4
%tmp5 = load i32, i32* %.global_tid., align 4
call void @__kmpc_for_static_init_4(%struct.ident_t* nonnull @0, i32 %tmp5, i32 34, i32* nonnull %.omp.is_last, i32* nonnull %.omp.lb, i32* nonnull %.omp.ub, i32* nonnull %.omp.stride, i32 1, i32 1)
%tmp6 = load i32, i32* %.omp.ub, align 4
%cmp6 = icmp sgt i32 %tmp6, %sub3
br i1 %cmp6, label %cond.true, label %cond.false

cond.true: ; preds = %omp.precond.then
br label %cond.end

cond.false: ; preds = %omp.precond.then
%tmp7 = load i32, i32* %.omp.ub, align 4
br label %cond.end

cond.end: ; preds = %cond.false, %cond.true
%cond = phi i32 [ %sub3, %cond.true ], [ %tmp7, %cond.false ]
store i32 %cond, i32* %.omp.ub, align 4
%tmp8 = load i32, i32* %.omp.lb, align 4
br label %omp.inner.for.cond

omp.inner.for.cond: ; preds = %omp.inner.for.inc, %cond.end
%.omp.iv.0 = phi i32 [ %tmp8, %cond.end ], [ %add11, %omp.inner.for.inc ]
%tmp9 = load i32, i32* %.omp.ub, align 4
%cmp8 = icmp sgt i32 %.omp.iv.0, %tmp9
br i1 %cmp8, label %omp.inner.for.cond.cleanup, label %omp.inner.for.body

omp.inner.for.cond.cleanup: ; preds = %omp.inner.for.cond
br label %omp.inner.for.end

omp.inner.for.body: ; preds = %omp.inner.for.cond
%add10 = add nsw i32 %.omp.iv.0, 2
%tmp10 = load float, float* %p, align 4
%tmp11 = load double, double* %conv, align 8
call void @bar(i32 %add10, float %tmp10, double %tmp11)
br label %omp.body.continue

omp.body.continue: ; preds = %omp.inner.for.body
br label %omp.inner.for.inc

omp.inner.for.inc: ; preds = %omp.body.continue
%add11 = add nsw i32 %.omp.iv.0, 1
br label %omp.inner.for.cond

omp.inner.for.end: ; preds = %omp.inner.for.cond.cleanup
br label %omp.loop.exit

omp.loop.exit: ; preds = %omp.inner.for.end
%tmp12 = load i32, i32* %.global_tid., align 4
call void @__kmpc_for_static_fini(%struct.ident_t* nonnull @0, i32 %tmp12)
br label %omp.precond.end

omp.precond.end: ; preds = %omp.loop.exit, %entry
ret void
}

declare dso_local void @__kmpc_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32)

declare dso_local void @bar(i32, float, double)

declare dso_local void @__kmpc_for_static_fini(%struct.ident_t*, i32)

declare !callback !0 dso_local void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...)

!1 = !{i64 2, i64 -1, i64 -1, i1 true}
!0 = !{!1}
49 changes: 49 additions & 0 deletions llvm/test/Transforms/IPConstantProp/pthreads.ll
@@ -0,0 +1,49 @@
; RUN: opt -ipconstprop -S < %s | FileCheck %s
;
; #include <pthread.h>
;
; void *GlobalVPtr;
;
; static void *foo(void *arg) { return arg; }
; static void *bar(void *arg) { return arg; }
;
; int main() {
; pthread_t thread;
; pthread_create(&thread, NULL, foo, NULL);
; pthread_create(&thread, NULL, bar, &GlobalVPtr);
; return 0;
; }
;
; Verify the constant values NULL and &GlobalVPtr are propagated into foo and
; bar, respectively.
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

%union.pthread_attr_t = type { i64, [48 x i8] }

@GlobalVPtr = common dso_local global i8* null, align 8

define dso_local i32 @main() {
entry:
%thread = alloca i64, align 8
%call = call i32 @pthread_create(i64* nonnull %thread, %union.pthread_attr_t* null, i8* (i8*)* nonnull @foo, i8* null)
%call1 = call i32 @pthread_create(i64* nonnull %thread, %union.pthread_attr_t* null, i8* (i8*)* nonnull @bar, i8* bitcast (i8** @GlobalVPtr to i8*))
ret i32 0
}

declare !callback !0 dso_local i32 @pthread_create(i64*, %union.pthread_attr_t*, i8* (i8*)*, i8*)

define internal i8* @foo(i8* %arg) {
entry:
; CHECK: ret i8* null
ret i8* %arg
}

define internal i8* @bar(i8* %arg) {
entry:
; CHECK: ret i8* bitcast (i8** @GlobalVPtr to i8*)
ret i8* %arg
}

!1 = !{i64 2, i64 3, i1 false}
!0 = !{!1}
42 changes: 42 additions & 0 deletions llvm/test/Transforms/IPConstantProp/thread_local_acs.ll
@@ -0,0 +1,42 @@
; RUN: opt -ipconstprop -S < %s | FileCheck %s
;
; #include <threads.h>
; thread_local int gtl = 0;
; int gsh = 0;
;
; static int callee(int *thread_local_ptr, int *shared_ptr) {
; return *thread_local_ptr + *shared_ptr;
; }
;
; void broker(int *, int (*callee)(int *, int *), int *);
;
; void caller() {
; broker(&gtl, callee, &gsh);
; }
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

@gtl = dso_local thread_local global i32 0, align 4
@gsh = dso_local global i32 0, align 4

define internal i32 @callee(i32* %thread_local_ptr, i32* %shared_ptr) {
entry:
; CHECK: %tmp = load i32, i32* %thread_local_ptr, align 4
; CHECK: %tmp1 = load i32, i32* @gsh, align 4
; CHECK: %add = add nsw i32 %tmp, %tmp1
%tmp = load i32, i32* %thread_local_ptr, align 4
%tmp1 = load i32, i32* %shared_ptr, align 4
%add = add nsw i32 %tmp, %tmp1
ret i32 %add
}

define dso_local void @caller() {
entry:
call void @broker(i32* nonnull @gtl, i32 (i32*, i32*)* nonnull @callee, i32* nonnull @gsh)
ret void
}

declare !callback !0 dso_local void @broker(i32*, i32 (i32*, i32*)*, i32*)

!1 = !{i64 1, i64 0, i64 2, i1 false}
!0 = !{!1}

0 comments on commit 36872b5

Please sign in to comment.