diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 4da26e2e4973b..54a980523d289 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -231,6 +231,8 @@ struct IRPosition { /// Create a position describing the argument of \p ACS at position \p ArgNo. static const IRPosition callsite_argument(AbstractCallSite ACS, unsigned ArgNo) { + if (ACS.getNumArgOperands() <= ArgNo) + return IRPosition(); int CSArgNo = ACS.getCallArgOperandNo(ArgNo); if (CSArgNo >= 0) return IRPosition::callsite_argument( diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 0c806eb8de153..aaa912b7c02d8 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -4593,21 +4593,24 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl { bool HasValueBefore = SimplifiedAssociatedValue.hasValue(); auto PredForCallSite = [&](AbstractCallSite ACS) { - // Check if we have an associated argument or not (which can happen for - // callback calls). - Value *ArgOp = ACS.getCallArgOperand(getArgNo()); - if (!ArgOp) + const IRPosition &ACSArgPos = + IRPosition::callsite_argument(ACS, getArgNo()); + // Check if a coresponding argument was found or if it is on not + // associated (which can happen for callback calls). + if (ACSArgPos.getPositionKind() == IRPosition::IRP_INVALID) return false; + // We can only propagate thread independent values through callbacks. // This is different to direct/indirect call sites because for them we // know the thread executing the caller and callee is the same. For // callbacks this is not guaranteed, thus a thread dependent value could // be different for the caller and callee, making it invalid to propagate. + Value &ArgOp = ACSArgPos.getAssociatedValue(); if (ACS.isCallbackCall()) - if (auto *C = dyn_cast(ArgOp)) + if (auto *C = dyn_cast(&ArgOp)) if (C->isThreadDependent()) return false; - return checkAndUpdate(A, *this, *ArgOp, SimplifiedAssociatedValue); + return checkAndUpdate(A, *this, ArgOp, SimplifiedAssociatedValue); }; bool AllCallSitesKnown; @@ -7289,13 +7292,23 @@ bool Attributor::checkForAllCallSites( // If we do not require all call sites we might not see all. AllCallSitesKnown = RequireAllCallSites; - for (const Use &U : Fn.uses()) { + SmallVector Uses(make_pointer_range(Fn.uses())); + for (unsigned u = 0; u < Uses.size(); ++u) { + const Use &U = *Uses[u]; LLVM_DEBUG(dbgs() << "[Attributor] Check use: " << *U << " in " << *U.getUser() << "\n"); if (isAssumedDead(U, QueryingAA, nullptr, /* CheckBBLivenessOnly */ true)) { LLVM_DEBUG(dbgs() << "[Attributor] Dead use, skip!\n"); continue; } + if (ConstantExpr *CE = dyn_cast(U.getUser())) { + if (CE->isCast() && CE->getType()->isPointerTy() && + CE->getType()->getPointerElementType()->isFunctionTy()) { + for (const Use &CEU : CE->uses()) + Uses.push_back(&CEU); + continue; + } + } AbstractCallSite ACS(&U); if (!ACS) { diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/arg-count-mismatch.ll b/llvm/test/Transforms/Attributor/IPConstantProp/arg-count-mismatch.ll index 0716960b3854c..11cd72a41041f 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/arg-count-mismatch.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/arg-count-mismatch.ll @@ -30,14 +30,14 @@ ; This test is just to verify that we do not crash/assert due to mismatch in ; argument count between the caller and callee. -define dso_local void @foo(i16 %a) { +define dso_local i16 @foo(i16 %a) { ; CHECK-LABEL: define {{[^@]+}}@foo ; CHECK-SAME: (i16 [[A:%.*]]) ; CHECK-NEXT: [[CALL:%.*]] = call i16 bitcast (i16 (i16, i16)* @bar to i16 (i16)*)(i16 [[A]]) -; CHECK-NEXT: ret void +; CHECK-NEXT: ret i16 [[CALL]] ; %call = call i16 bitcast (i16 (i16, i16) * @bar to i16 (i16) *)(i16 %a) - ret void + ret i16 %call } define internal i16 @bar(i16 %p1, i16 %p2) { @@ -73,7 +73,7 @@ define internal i16 @vararg_prop(i16 %p1, ...) { define internal i16 @vararg_no_prop(i16 %p1, i16 %p2, ...) { ; CHECK-LABEL: define {{[^@]+}}@vararg_no_prop ; CHECK-SAME: (i16 returned [[P1:%.*]], i16 [[P2:%.*]], ...) -; CHECK-NEXT: ret i16 [[P1]] +; CHECK-NEXT: ret i16 7 ; ret i16 %p1 } diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/arg-type-mismatch.ll b/llvm/test/Transforms/Attributor/IPConstantProp/arg-type-mismatch.ll index e7f8705675bbc..a91e8eeee5eec 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/arg-type-mismatch.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/arg-type-mismatch.ll @@ -4,14 +4,14 @@ ; This test is just to verify that we do not crash/assert due to mismatch in ; argument type between the caller and callee. -define dso_local void @foo(i16 %a) { +define dso_local i16 @foo(i16 %a) { ; CHECK-LABEL: define {{[^@]+}}@foo ; CHECK-SAME: (i16 [[A:%.*]]) ; CHECK-NEXT: [[CALL:%.*]] = call i16 bitcast (i16 (i16, i16)* @bar to i16 (i16, i32)*)(i16 [[A]], i32 7) -; CHECK-NEXT: ret void +; CHECK-NEXT: ret i16 [[CALL]] ; %call = call i16 bitcast (i16 (i16, i16) * @bar to i16 (i16, i32) *)(i16 %a, i32 7) - ret void + ret i16 %call } define internal i16 @bar(i16 %p1, i16 %p2) { diff --git a/llvm/test/Transforms/Attributor/callbacks.ll b/llvm/test/Transforms/Attributor/callbacks.ll index 62df702b68008..fb0984a835a6d 100644 --- a/llvm/test/Transforms/Attributor/callbacks.ll +++ b/llvm/test/Transforms/Attributor/callbacks.ll @@ -170,5 +170,61 @@ declare void @t2_check(i32* nocapture align 256, i64, i32* nocapture) declare !callback !0 void @t2_callback_broker(i32* nocapture , i32* nocapture , void (i32*, i32*, ...)* nocapture, ...) +; Test 3 +; +; Basically test 2 with the casted callback callee used twice. + +define void @t3_caller(i32* noalias %a) { +; CHECK-LABEL: define {{[^@]+}}@t3_caller +; CHECK-SAME: (i32* noalias nocapture align 256 [[A:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 32 +; CHECK-NEXT: [[C:%.*]] = alloca i32*, align 64 +; CHECK-NEXT: [[PTR:%.*]] = alloca i32, align 128 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: store i32 42, i32* [[B]], align 32 +; CHECK-NEXT: store i32* [[B]], i32** [[C]], align 64 +; CHECK-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias align 536870912 null, i32* noalias nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; CHECK-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias align 536870912 null, i32* noalias nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; CHECK-NEXT: ret void +; +entry: + %b = alloca i32, align 32 + %c = alloca i32*, align 64 + %ptr = alloca i32, align 128 + %0 = bitcast i32* %b to i8* + store i32 42, i32* %b, align 4 + store i32* %b, i32** %c, align 8 + call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* null, i32* %ptr, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* %a, i64 99, i32** %c) + call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* null, i32* %ptr, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* %a, i64 99, i32** %c) + ret void +} + +; Note that the first two arguments are provided by the callback_broker according to the callback in !1 below! +; The others are annotated with alignment information, amongst others, or even replaced by the constants passed to the call. +; +; FIXME: We should derive noalias for %a and add a "fake use" of %a in all potentially synchronizing calls. +define internal void @t3_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) { +; CHECK-LABEL: define {{[^@]+}}@t3_callback_callee +; CHECK-SAME: (i32* nocapture nonnull writeonly dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 +; CHECK-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64 +; CHECK-NEXT: tail call void @t3_check(i32* nocapture align 256 [[A]], i64 99, i32* [[TMP0]]) +; CHECK-NEXT: ret void +; +entry: + %ptr_val = load i32, i32* %ptr, align 8 + store i32 %ptr_val, i32* %is_not_null + %0 = load i32*, i32** %c, align 8 + tail call void @t3_check(i32* %a, i64 %b, i32* %0) + ret void +} + +declare void @t3_check(i32* nocapture align 256, i64, i32* nocapture) + +declare !callback !0 void @t3_callback_broker(i32* nocapture , i32* nocapture , void (i32*, i32*, ...)* nocapture, ...) + !0 = !{!1} !1 = !{i64 2, i64 -1, i64 -1, i1 true}