-
Notifications
You must be signed in to change notification settings - Fork 10.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Inliner] Propagate callee argument memory access attributes before inlining #89024
Conversation
@llvm/pr-subscribers-llvm-transforms Author: None (goldsteinn) ChangesTo avoid losing information, we can propagate some access attribute We can propagate argument memory access attributes to callsite Full diff: https://github.com/llvm/llvm-project/pull/89024.diff 4 Files Affected:
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 833dcbec228b88..30d1703d6b5a59 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -1344,6 +1344,57 @@ static bool MayContainThrowingOrExitingCallAfterCB(CallBase *Begin,
++BeginIt, End->getIterator(), InlinerAttributeWindow + 1);
}
+// Add attributes from CB params and Fn attributes that can always be propagated
+// to the corresponding argument / inner callbases.
+static void AddParamAndFnBasicAttributes(const CallBase &CB,
+ ValueToValueMapTy &VMap) {
+ auto *CalledFunction = CB.getCalledFunction();
+ auto &Context = CalledFunction->getContext();
+
+ // Collect valid attributes for all params.
+ SmallVector<AttrBuilder> ValidParamAttrs;
+ bool HasAttrToPropagate = false;
+
+ for (unsigned I = 0, E = CB.arg_size(); I < E; ++I) {
+ ValidParamAttrs.emplace_back(AttrBuilder{CB.getContext()});
+ // Access attributes can be propagated to any param with the same underlying
+ // object as the argument.
+ if (CB.paramHasAttr(I, Attribute::ReadNone))
+ ValidParamAttrs.back().addAttribute(Attribute::ReadNone);
+ if (CB.paramHasAttr(I, Attribute::ReadOnly))
+ ValidParamAttrs.back().addAttribute(Attribute::ReadOnly);
+ if (CB.paramHasAttr(I, Attribute::WriteOnly))
+ ValidParamAttrs.back().addAttribute(Attribute::WriteOnly);
+ HasAttrToPropagate |= ValidParamAttrs.back().hasAttributes();
+ }
+
+ // Won't be able to propagate anything.
+ if (!HasAttrToPropagate)
+ return;
+
+ for (BasicBlock &BB : *CalledFunction) {
+ for (Instruction &Ins : BB) {
+ if (const auto *InnerCB = dyn_cast<CallBase>(&Ins)) {
+ if (auto *NewInnerCB =
+ dyn_cast_or_null<CallBase>(VMap.lookup(InnerCB))) {
+ AttributeList AL = NewInnerCB->getAttributes();
+ for (unsigned I = 0, E = InnerCB->arg_size(); I < E; ++I) {
+ // Check if the underlying value for the parameter is an argument.
+ const Value *UnderlyingV =
+ getUnderlyingObject(InnerCB->getArgOperand(I));
+ if (const Argument *Arg = dyn_cast<Argument>(UnderlyingV)) {
+ unsigned ArgNo = Arg->getArgNo();
+ // If so, propagate its access attributes.
+ AL = AL.addParamAttributes(Context, I, ValidParamAttrs[ArgNo]);
+ }
+ }
+ NewInnerCB->setAttributes(AL);
+ }
+ }
+ }
+ }
+}
+
// Only allow these white listed attributes to be propagated back to the
// callee. This is because other attributes may only be valid on the call
// itself, i.e. attributes such as signext and zeroext.
@@ -2363,6 +2414,10 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
// function which feed into its return value.
AddReturnAttributes(CB, VMap);
+ // Clone attributes on the params of the callsite to calls within the
+ // inlined function which use the same param.
+ AddParamAndFnBasicAttributes(CB, VMap);
+
propagateMemProfMetadata(CalledFunc, CB,
InlinedFunctionInfo.ContainsMemProfMetadata, VMap);
diff --git a/llvm/test/Transforms/Inline/access-attributes-prop.ll b/llvm/test/Transforms/Inline/access-attributes-prop.ll
index 3b4a59897c5694..f91f3550f338f5 100644
--- a/llvm/test/Transforms/Inline/access-attributes-prop.ll
+++ b/llvm/test/Transforms/Inline/access-attributes-prop.ll
@@ -168,7 +168,7 @@ define dso_local void @foo2_through_obj(ptr %p, ptr %p2) {
define void @prop_param_func_decl(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@prop_param_func_decl
; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT: call void @bar1(ptr [[P]])
+; CHECK-NEXT: call void @bar1(ptr readonly [[P]])
; CHECK-NEXT: ret void
;
call void @foo1_rdonly(ptr %p)
@@ -178,7 +178,7 @@ define void @prop_param_func_decl(ptr %p) {
define void @prop_param_callbase_def(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@prop_param_callbase_def
; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT: call void @bar1(ptr [[P]])
+; CHECK-NEXT: call void @bar1(ptr readonly [[P]])
; CHECK-NEXT: call void @bar1(ptr [[P]])
; CHECK-NEXT: ret void
;
@@ -190,7 +190,7 @@ define void @prop_param_callbase_def(ptr %p) {
define void @prop_param_callbase_def_2x(ptr %p, ptr %p2) {
; CHECK-LABEL: define {{[^@]+}}@prop_param_callbase_def_2x
; CHECK-SAME: (ptr [[P:%.*]], ptr [[P2:%.*]]) {
-; CHECK-NEXT: call void @bar2(ptr [[P]], ptr [[P]])
+; CHECK-NEXT: call void @bar2(ptr readonly [[P]], ptr readonly [[P]])
; CHECK-NEXT: ret void
;
call void @foo2(ptr readonly %p, ptr %p)
@@ -202,7 +202,7 @@ define void @prop_param_callbase_def_2x_2(ptr %p, ptr %p2) {
; CHECK-SAME: (ptr [[P:%.*]], ptr [[P2:%.*]]) {
; CHECK-NEXT: [[PP_I:%.*]] = getelementptr i8, ptr [[P]], i64 9
; CHECK-NEXT: [[P2P_I:%.*]] = getelementptr i8, ptr [[P2]], i64 123
-; CHECK-NEXT: call void @bar2(ptr [[P2P_I]], ptr [[PP_I]])
+; CHECK-NEXT: call void @bar2(ptr writeonly [[P2P_I]], ptr readonly [[PP_I]])
; CHECK-NEXT: ret void
;
call void @foo2_through_obj(ptr readonly %p, ptr writeonly %p2)
@@ -214,7 +214,7 @@ define void @prop_param_callbase_def_2x_incompat(ptr %p, ptr %p2) {
; CHECK-SAME: (ptr [[P:%.*]], ptr [[P2:%.*]]) {
; CHECK-NEXT: [[PP_I:%.*]] = getelementptr i8, ptr [[P]], i64 9
; CHECK-NEXT: [[P2P_I:%.*]] = getelementptr i8, ptr [[P]], i64 123
-; CHECK-NEXT: call void @bar2(ptr [[P2P_I]], ptr [[PP_I]])
+; CHECK-NEXT: call void @bar2(ptr readonly [[P2P_I]], ptr readnone [[PP_I]])
; CHECK-NEXT: ret void
;
call void @foo2_through_obj(ptr readnone %p, ptr readonly %p)
@@ -224,7 +224,7 @@ define void @prop_param_callbase_def_2x_incompat(ptr %p, ptr %p2) {
define void @prop_param_callbase_def_2x_incompat_2(ptr %p, ptr %p2) {
; CHECK-LABEL: define {{[^@]+}}@prop_param_callbase_def_2x_incompat_2
; CHECK-SAME: (ptr [[P:%.*]], ptr [[P2:%.*]]) {
-; CHECK-NEXT: call void @bar2(ptr [[P]], ptr [[P]])
+; CHECK-NEXT: call void @bar2(ptr readonly [[P]], ptr readonly [[P]])
; CHECK-NEXT: ret void
;
call void @foo2(ptr readonly %p, ptr readnone %p)
@@ -234,7 +234,7 @@ define void @prop_param_callbase_def_2x_incompat_2(ptr %p, ptr %p2) {
define void @prop_param_callbase_def_2x_incompat_3(ptr %p, ptr %p2) {
; CHECK-LABEL: define {{[^@]+}}@prop_param_callbase_def_2x_incompat_3
; CHECK-SAME: (ptr [[P:%.*]], ptr [[P2:%.*]]) {
-; CHECK-NEXT: call void @bar2(ptr [[P]], ptr [[P]])
+; CHECK-NEXT: call void @bar2(ptr readnone [[P]], ptr readnone [[P]])
; CHECK-NEXT: ret void
;
call void @foo2_2(ptr readonly %p, ptr readnone %p)
@@ -244,7 +244,7 @@ define void @prop_param_callbase_def_2x_incompat_3(ptr %p, ptr %p2) {
define void @prop_param_callbase_def_1x_partial(ptr %p, ptr %p2) {
; CHECK-LABEL: define {{[^@]+}}@prop_param_callbase_def_1x_partial
; CHECK-SAME: (ptr [[P:%.*]], ptr [[P2:%.*]]) {
-; CHECK-NEXT: call void @bar2(ptr [[P]], ptr [[P]])
+; CHECK-NEXT: call void @bar2(ptr readonly [[P]], ptr readonly [[P]])
; CHECK-NEXT: ret void
;
call void @foo2(ptr readonly %p, ptr %p)
@@ -264,7 +264,7 @@ define void @prop_param_callbase_def_1x_partial_2(ptr %p, ptr %p2) {
define void @prop_param_callbase_def_1x_partial_3(ptr %p, ptr %p2) {
; CHECK-LABEL: define {{[^@]+}}@prop_param_callbase_def_1x_partial_3
; CHECK-SAME: (ptr [[P:%.*]], ptr [[P2:%.*]]) {
-; CHECK-NEXT: call void @bar2(ptr [[P]], ptr [[P]])
+; CHECK-NEXT: call void @bar2(ptr readonly [[P]], ptr readnone [[P]])
; CHECK-NEXT: ret void
;
call void @foo2_3(ptr readonly %p, ptr %p)
diff --git a/llvm/test/Transforms/Inline/noalias-calls-always.ll b/llvm/test/Transforms/Inline/noalias-calls-always.ll
index 9c851b93278392..a80cd12b26b655 100644
--- a/llvm/test/Transforms/Inline/noalias-calls-always.ll
+++ b/llvm/test/Transforms/Inline/noalias-calls-always.ll
@@ -34,11 +34,11 @@ define void @foo(ptr nocapture %a, ptr nocapture readonly %c, ptr nocapture %b)
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META0:![0-9]+]])
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]])
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 512, ptr [[L_I]])
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[A:%.*]], ptr align 16 [[B:%.*]], i64 16, i1 false), !noalias !3
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[B]], ptr align 16 [[C:%.*]], i64 16, i1 false), !noalias !0
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[A]], ptr align 16 [[C]], i64 16, i1 false), !alias.scope !5
-; CHECK-NEXT: call void @hey(), !noalias !5
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[L_I]], ptr align 16 [[C]], i64 16, i1 false), !noalias !0
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[A:%.*]], ptr align 16 [[B:%.*]], i64 16, i1 false), !noalias [[META3]]
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[B]], ptr readonly align 16 [[C:%.*]], i64 16, i1 false), !noalias [[META0]]
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[A]], ptr readonly align 16 [[C]], i64 16, i1 false), !alias.scope [[META5:![0-9]+]]
+; CHECK-NEXT: call void @hey(), !noalias [[META5]]
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[L_I]], ptr readonly align 16 [[C]], i64 16, i1 false), !noalias [[META0]]
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 512, ptr [[L_I]])
; CHECK-NEXT: ret void
;
@@ -75,11 +75,11 @@ define void @foo_cs(ptr nocapture %a, ptr nocapture readonly %c, ptr nocapture %
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]])
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]])
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 512, ptr [[L_I]])
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[A:%.*]], ptr align 16 [[B:%.*]], i64 16, i1 false), !noalias !9
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[B]], ptr align 16 [[C:%.*]], i64 16, i1 false), !noalias !6
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[A]], ptr align 16 [[C]], i64 16, i1 false), !alias.scope !11
-; CHECK-NEXT: call void @hey(), !noalias !11
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[L_I]], ptr align 16 [[C]], i64 16, i1 false), !noalias !6
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[A:%.*]], ptr align 16 [[B:%.*]], i64 16, i1 false), !noalias [[META9]]
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[B]], ptr readonly align 16 [[C:%.*]], i64 16, i1 false), !noalias [[META6]]
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[A]], ptr readonly align 16 [[C]], i64 16, i1 false), !alias.scope [[META11:![0-9]+]]
+; CHECK-NEXT: call void @hey(), !noalias [[META11]]
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[L_I]], ptr readonly align 16 [[C]], i64 16, i1 false), !noalias [[META6]]
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 512, ptr [[L_I]])
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Transforms/Inline/noalias-calls.ll b/llvm/test/Transforms/Inline/noalias-calls.ll
index e3791da54b2326..0dd9ec3498a93a 100644
--- a/llvm/test/Transforms/Inline/noalias-calls.ll
+++ b/llvm/test/Transforms/Inline/noalias-calls.ll
@@ -37,11 +37,11 @@ define void @foo(ptr nocapture %a, ptr nocapture readonly %c, ptr nocapture %b)
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META0:![0-9]+]])
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]])
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 512, ptr [[L_I]])
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[A]], ptr align 16 [[B]], i64 16, i1 false), !noalias !3
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[B]], ptr align 16 [[C]], i64 16, i1 false), !noalias !0
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[A]], ptr align 16 [[C]], i64 16, i1 false), !alias.scope !5
-; CHECK-NEXT: call void @hey(), !noalias !5
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[L_I]], ptr align 16 [[C]], i64 16, i1 false), !noalias !0
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[A]], ptr align 16 [[B]], i64 16, i1 false), !noalias [[META3]]
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[B]], ptr readonly align 16 [[C]], i64 16, i1 false), !noalias [[META0]]
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[A]], ptr readonly align 16 [[C]], i64 16, i1 false), !alias.scope [[META5:![0-9]+]]
+; CHECK-NEXT: call void @hey(), !noalias [[META5]]
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[L_I]], ptr readonly align 16 [[C]], i64 16, i1 false), !noalias [[META0]]
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 512, ptr [[L_I]])
; CHECK-NEXT: ret void
;
@@ -80,11 +80,11 @@ define void @foo_cs(ptr nocapture %a, ptr nocapture readonly %c, ptr nocapture %
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]])
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]])
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 512, ptr [[L_I]])
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[A]], ptr align 16 [[B]], i64 16, i1 false), !noalias !9
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[B]], ptr align 16 [[C]], i64 16, i1 false), !noalias !6
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[A]], ptr align 16 [[C]], i64 16, i1 false), !alias.scope !11
-; CHECK-NEXT: call void @hey(), !noalias !11
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[L_I]], ptr align 16 [[C]], i64 16, i1 false), !noalias !6
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[A]], ptr align 16 [[B]], i64 16, i1 false), !noalias [[META9]]
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[B]], ptr readonly align 16 [[C]], i64 16, i1 false), !noalias [[META6]]
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[A]], ptr readonly align 16 [[C]], i64 16, i1 false), !alias.scope [[META11:![0-9]+]]
+; CHECK-NEXT: call void @hey(), !noalias [[META11]]
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[L_I]], ptr readonly align 16 [[C]], i64 16, i1 false), !noalias [[META6]]
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 512, ptr [[L_I]])
; CHECK-NEXT: ret void
;
|
There are more cases we can add, this is meant to be a starter / test out any conception issues. |
The change looks correct to me, but I wonder where this is useful and what the benefit/compile-time tradeoff is? Propagating these attributes from the function definition itself to its body could and maybe should happen elsewhere: once per definition instead of each time the inliner runs. And what's the scenario in which the attributes are added on the I assume there's some reason, otherwise you wouldn't have posted this PR, but I'd still like to understand it better because it isn't obvious to me. |
Well we are trying to progagate the callsite attributes which can differ from the function attributes (although access attributes are rare). By having in the inliner we only propagate at the point we may lose the information.
The motivation for me is so that you can put attributes on wrapper functions that will be inlined. |
A bit more clearly there are two cases:
|
da71e7d
to
783fd13
Compare
t
No compile-time cost: https://llvm-compile-time-tracker.com/compare.php?from=9eeae4421198b99eab3ae9a4ff678fda26bbda2a&to=783fd134e75351f1f0b6e4d4a3a9edb0d49c3a27&stat=instructions:u |
FWIW, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is correct.
for (Instruction &Ins : BB) { | ||
if (const auto *InnerCB = dyn_cast<CallBase>(&Ins)) { | ||
if (auto *NewInnerCB = | ||
dyn_cast_or_null<CallBase>(VMap.lookup(InnerCB))) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd suggest to early continue these two, to reduce nesting.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This one doesn't look done?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I forgot to save before committing, fixing...
c3e8c68
to
35e8c53
Compare
// Writable cannot exist in conjunction w/ readonly/readnone | ||
if (AL.hasParamAttr(I, Attribute::ReadOnly) || | ||
AL.hasParamAttr(I, Attribute::ReadNone)) | ||
AL = AL.removeParamAttribute(Context, I, Attribute::Writable); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is missing test coverage?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added test. Its a bit weird in that we have the param attr writable
at definition but readonly
at callsite. Maybe I should handle these conflicts by keeping the declaration params?
// If have readnone, need to clear readonly/writeonly | ||
if (AL.hasParamAttr(I, Attribute::ReadNone)) { | ||
AL = AL.removeParamAttribute(Context, I, Attribute::ReadOnly); | ||
AL = AL.removeParamAttribute(Context, I, Attribute::WriteOnly); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Kind of makes me wonder if we should drop readnone and just encode it as readonly + writeonly instead...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
+1, think that would make things easier.
35e8c53
to
fb8d67c
Compare
; | ||
call void @foo1_rdonly(ptr readonly %p) | ||
ret void | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hm, I don't think this is testing the right thing. Wouldn't we need the writable to be on the bar1 parameter, rather than the parameter of the outer function?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, you're right. Fixed. Should be propagating readonly
/readnone
now to a bar
w/ writable
.
…nlining To avoid losing information, we can propagate some access attribute from the to-be-inlined callee to its callsites. We can propagate argument memory access attributes to callsite parameters if they are from the same underlying object.
fb8d67c
to
91702df
Compare
Heads-up: this is causing a probable miscompile in an internal test in google. I'm working on a repro.. |
Oh shoot missed this, any repro? |
Sorry, I forgot to reply back. We are more inclined now to think it's a problem in the code and not with the patch. Sorry for the inconvenience. |
I think this patch actually causes a miscompile after all - it propagates memory access attributes to byval params too, which is unsound. Opened #93381. |
Memory restrictions for params to the inlined function do not apply to the copies logically made when that function further passes its own params as byval. In other words, imagine that `@foo()` calls `@bar(ptr readonly %p)` which in turn calls `@baz(ptr byval("...") %p)` (passing the same `%p`). This is fully legal - `baz` is allowed to modify its copy of the object referenced by `%p` because the argument is passed by value. However, when inlining `@bar` into `@foo`, we can't say that the callsite is now `@baz(ptr readonly byval("...") %p)`, as this would mean that `@baz` is not allowed to modify it's copy of the object pointed to by `%p`. LangRef says: "The copy is considered to belong to the caller not the callee (for example, readonly functions should not write to byval parameters)". This fixes a miscompile introduced by PR #89024 in a program in the Google codebase.
To avoid losing information, we can propagate some access attribute
from the to-be-inlined callee to its callsites.
We can propagate argument memory access attributes to callsite
parameters if they are from the same underlying object.