-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Inliner] Add argument/function attribute propagation before inlining. #68164
[Inliner] Add argument/function attribute propagation before inlining. #68164
Conversation
goldsteinn
commented
Oct 3, 2023
- [Inliner] Propagate callee function memory access attributes before inlining
- [Inliner] Propagate callee argument memory access attributes before inlining
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-clang Changes
Patch is 24.83 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/68164.diff 8 Files Affected:
diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c
index 44f8cbe2cc01739..642b08ac68ef122 100644
--- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c
+++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c
@@ -636,31 +636,31 @@ void test_core(void) {
// CHECK-ASM: vlbb
vsc = vec_load_len(cptrsc, idx);
- // CHECK: call <16 x i8> @llvm.s390.vll(i32 %{{.*}}, ptr %{{.*}})
+ // CHECK: call <16 x i8> @llvm.s390.vll(i32 %{{.*}}, ptr readonly %{{.*}})
// CHECK-ASM: vll
vuc = vec_load_len(cptruc, idx);
- // CHECK: call <16 x i8> @llvm.s390.vll(i32 %{{.*}}, ptr %{{.*}})
+ // CHECK: call <16 x i8> @llvm.s390.vll(i32 %{{.*}}, ptr readonly %{{.*}})
// CHECK-ASM: vll
vss = vec_load_len(cptrss, idx);
- // CHECK: call <16 x i8> @llvm.s390.vll(i32 %{{.*}}, ptr %{{.*}})
+ // CHECK: call <16 x i8> @llvm.s390.vll(i32 %{{.*}}, ptr readonly %{{.*}})
// CHECK-ASM: vll
vus = vec_load_len(cptrus, idx);
- // CHECK: call <16 x i8> @llvm.s390.vll(i32 %{{.*}}, ptr %{{.*}})
+ // CHECK: call <16 x i8> @llvm.s390.vll(i32 %{{.*}}, ptr readonly %{{.*}})
// CHECK-ASM: vll
vsi = vec_load_len(cptrsi, idx);
- // CHECK: call <16 x i8> @llvm.s390.vll(i32 %{{.*}}, ptr %{{.*}})
+ // CHECK: call <16 x i8> @llvm.s390.vll(i32 %{{.*}}, ptr readonly %{{.*}})
// CHECK-ASM: vll
vui = vec_load_len(cptrui, idx);
- // CHECK: call <16 x i8> @llvm.s390.vll(i32 %{{.*}}, ptr %{{.*}})
+ // CHECK: call <16 x i8> @llvm.s390.vll(i32 %{{.*}}, ptr readonly %{{.*}})
// CHECK-ASM: vll
vsl = vec_load_len(cptrsl, idx);
- // CHECK: call <16 x i8> @llvm.s390.vll(i32 %{{.*}}, ptr %{{.*}})
+ // CHECK: call <16 x i8> @llvm.s390.vll(i32 %{{.*}}, ptr readonly %{{.*}})
// CHECK-ASM: vll
vul = vec_load_len(cptrul, idx);
- // CHECK: call <16 x i8> @llvm.s390.vll(i32 %{{.*}}, ptr %{{.*}})
+ // CHECK: call <16 x i8> @llvm.s390.vll(i32 %{{.*}}, ptr readonly %{{.*}})
// CHECK-ASM: vll
vd = vec_load_len(cptrd, idx);
- // CHECK: call <16 x i8> @llvm.s390.vll(i32 %{{.*}}, ptr %{{.*}})
+ // CHECK: call <16 x i8> @llvm.s390.vll(i32 %{{.*}}, ptr readonly %{{.*}})
// CHECK-ASM: vll
vec_store_len(vsc, ptrsc, idx);
diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector2.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector2.c
index 416ca0ddd1b4fe2..3f02565dfb488ce 100644
--- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector2.c
+++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector2.c
@@ -207,10 +207,10 @@ void test_core(void) {
// CHECK-ASM: vlbb
vf = vec_load_len(cptrf, idx);
- // CHECK: call <16 x i8> @llvm.s390.vll(i32 %{{.*}}, ptr %{{.*}})
+ // CHECK: call <16 x i8> @llvm.s390.vll(i32 %{{.*}}, ptr readonly %{{.*}})
// CHECK-ASM: vll
vd = vec_load_len(cptrd, idx);
- // CHECK: call <16 x i8> @llvm.s390.vll(i32 %{{.*}}, ptr %{{.*}})
+ // CHECK: call <16 x i8> @llvm.s390.vll(i32 %{{.*}}, ptr readonly %{{.*}})
// CHECK-ASM: vll
vec_store_len(vf, ptrf, idx);
@@ -221,10 +221,10 @@ void test_core(void) {
// CHECK-ASM: vstl
vuc = vec_load_len_r(cptruc, 0);
- // CHECK: call <16 x i8> @llvm.s390.vlrl(i32 0, ptr %{{.*}})
+ // CHECK: call <16 x i8> @llvm.s390.vlrl(i32 0, ptr readonly %{{.*}})
// CHECK-ASM: vlrl %{{.*}}, 0(%{{.*}}), 0
vuc = vec_load_len_r(cptruc, idx);
- // CHECK: call <16 x i8> @llvm.s390.vlrl(i32 %{{.*}}, ptr %{{.*}})
+ // CHECK: call <16 x i8> @llvm.s390.vlrl(i32 %{{.*}}, ptr readonly %{{.*}})
// CHECK-ASM: vlrlr
vec_store_len_r(vuc, ptruc, 0);
diff --git a/llvm/include/llvm/Support/ModRef.h b/llvm/include/llvm/Support/ModRef.h
index 7687280111a1f86..dd8e8f36cca203d 100644
--- a/llvm/include/llvm/Support/ModRef.h
+++ b/llvm/include/llvm/Support/ModRef.h
@@ -180,6 +180,13 @@ template <typename LocationEnum> class MemoryEffectsBase {
return ME;
}
+ /// Get new MemoryEffectsBase with ModRef on the given Loc.
+ MemoryEffectsBase getWithLocUnknown(Location Loc) const {
+ MemoryEffectsBase ME = *this;
+ ME.setModRef(Loc, ModRefInfo::ModRef);
+ return ME;
+ }
+
/// Get ModRefInfo for any location.
ModRefInfo getModRef() const {
ModRefInfo MR = ModRefInfo::NoModRef;
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 6d5312c5a081ce9..02b80bfc8e172b5 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -32,6 +32,7 @@
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/AttributeMask.h"
#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constant.h"
@@ -61,6 +62,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ModRef.h"
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -1374,6 +1376,130 @@ static AttrBuilder IdentifyValidPoisonGeneratingAttributes(CallBase &CB) {
return Valid;
}
+// Add attributes from CB params and Fn attributes that can always be propagated
+// to the corresponding argument / inner callbases.
+static void AddParamAndFnBasicAttributes(const CallBase &CB,
+ ValueToValueMapTy &VMap) {
+ auto *CalledFunction = CB.getCalledFunction();
+ auto &Context = CalledFunction->getContext();
+
+ // Collect valid attributes for all params.
+ SmallVector<AttrBuilder> ValidParamAttrs;
+ bool HasAttrToPropagate = false;
+
+ for (unsigned I = 0, E = CB.arg_size(); I < E; ++I) {
+ ValidParamAttrs.emplace_back(AttrBuilder{CB.getContext()});
+ // Access attributes can be propagated to any param with the same underlying
+ // object as the argument.
+ if (CB.paramHasAttr(I, Attribute::ReadNone))
+ ValidParamAttrs.back().addAttribute(Attribute::ReadNone);
+ if (CB.paramHasAttr(I, Attribute::ReadOnly))
+ ValidParamAttrs.back().addAttribute(Attribute::ReadOnly);
+ if (CB.paramHasAttr(I, Attribute::WriteOnly))
+ ValidParamAttrs.back().addAttribute(Attribute::WriteOnly);
+ HasAttrToPropagate |= ValidParamAttrs.back().hasAttributes();
+ }
+
+ // Won't be able to propagate anything.
+ if (!HasAttrToPropagate)
+ return;
+
+ for (BasicBlock &BB : *CalledFunction) {
+ for (Instruction &Ins : BB) {
+ CallBase *InnerCB = dyn_cast<CallBase>(&Ins);
+ if (InnerCB != nullptr) {
+ if (auto *NewInnerCB =
+ dyn_cast_or_null<CallBase>(VMap.lookup(InnerCB))) {
+ AttributeList AL = NewInnerCB->getAttributes();
+ for (unsigned I = 0, E = InnerCB->arg_size(); I < E; ++I) {
+ // Check if the underlying value for the parameter is an argument.
+ const Value *UnderlyingV =
+ getUnderlyingObject(InnerCB->getArgOperand(I));
+ if (const Argument *Arg = dyn_cast<Argument>(UnderlyingV)) {
+ unsigned ArgNo = Arg->getArgNo();
+ // If so, propagate its access attributes.
+ AL = AL.addParamAttributes(Context, I,
+ ValidParamAttrs[ArgNo]);
+ }
+ }
+ NewInnerCB->setAttributes(AL);
+ }
+ }
+ }
+ }
+}
+
+// Recursively check BB for a preceding alloca. An alive alloca at the callsite
+// essentially makes propagating any memory effects impossible. While scanning
+// for the alloca also collect and callsites we may be able to modify.
+static const std::pair<bool, SmallVector<CallBase *, 4>> &
+GetBBAllocaAndCallsiteInfo(
+ BasicBlock *BB,
+ DenseMap<BasicBlock *, std::pair<bool, SmallVector<CallBase *, 4>>>
+ *FirstAllocaAndCBs,
+ MemoryEffects ME) {
+ auto InsertRes = FirstAllocaAndCBs->insert({BB, {false, {}}});
+ if (!InsertRes.second)
+ return InsertRes.first->second;
+
+ for (BasicBlock *PBB : predecessors(BB)) {
+ auto PBBInfo = GetBBAllocaAndCallsiteInfo(PBB, FirstAllocaAndCBs, ME);
+ if (PBBInfo.first) {
+ auto BBInfo = FirstAllocaAndCBs->find(BB);
+ assert(BBInfo != FirstAllocaAndCBs->end());
+ BBInfo->second.first = true;
+ // We have an alloca in a preceding BB, we can't propagate any memory
+ // effects.
+ return BBInfo->second;
+ }
+ }
+
+ auto BBInfo = FirstAllocaAndCBs->find(BB);
+ assert(BBInfo != FirstAllocaAndCBs->end());
+ for (auto &Ins : *BB) {
+ if (isa<AllocaInst>(&Ins)) {
+ BBInfo->second.first = true;
+ // Dominating alloca in the BB, we can propagate to any callsites prior to
+ // the alloca but none after.
+ return BBInfo->second;
+ }
+ // Add callsite.
+ if (auto *OtherCB = dyn_cast<CallBase>(&Ins))
+ BBInfo->second.second.push_back(OtherCB);
+ }
+ return BBInfo->second;
+}
+
+// Propagate memory effects from the to-be-inlined function to any callsites in
+// the function.
+static void AddFnAccessAttributes(CallBase &CB, ValueToValueMapTy &VMap) {
+ auto *CalledFunction = CB.getCalledFunction();
+ MemoryEffects ME = CB.getMemoryEffects();
+ if (ME == MemoryEffects::unknown())
+ return;
+ DenseMap<BasicBlock *, std::pair<bool, SmallVector<CallBase *, 4>>>
+ FirstAllocaAndCBs;
+
+ for (BasicBlock &BB : *CalledFunction) {
+ auto BBInfo = GetBBAllocaAndCallsiteInfo(&BB, &FirstAllocaAndCBs, ME);
+ // We found no callsites that we can propagate memory effects to.
+ if (BBInfo.second.empty())
+ continue;
+ for (CallBase *OtherCB : BBInfo.second) {
+ assert(OtherCB->getParent() == &BB);
+ if (auto *NewOtherCB = dyn_cast_or_null<CallBase>(VMap.lookup(OtherCB))) {
+ MemoryEffects NewME = NewOtherCB->getMemoryEffects();
+ // ArgMem memory effects don't directly apply.
+ NewME &= ME.getWithLocUnknown(IRMemLocation::ArgMem);
+ // If we have complete coverage of some ModRef then we can apply to
+ // ArgMem as well.
+ NewME &= MemoryEffects(ME.getModRef());
+ NewOtherCB->setMemoryEffects(NewME);
+ }
+ }
+ }
+}
+
static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap) {
AttrBuilder ValidUB = IdentifyValidUBGeneratingAttributes(CB);
AttrBuilder ValidPG = IdentifyValidPoisonGeneratingAttributes(CB);
@@ -2339,6 +2465,9 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
// Add noalias metadata if necessary.
AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR, InlinedFunctionInfo);
+ AddParamAndFnBasicAttributes(CB, VMap);
+ AddFnAccessAttributes(CB, VMap);
+
// Clone return attributes on the callsite into the calls within the inlined
// function which feed into its return value.
AddReturnAttributes(CB, VMap);
diff --git a/llvm/test/Transforms/Inline/access-attributes-prop.ll b/llvm/test/Transforms/Inline/access-attributes-prop.ll
index 3b4a59897c5694a..82688999abc1910 100644
--- a/llvm/test/Transforms/Inline/access-attributes-prop.ll
+++ b/llvm/test/Transforms/Inline/access-attributes-prop.ll
@@ -168,7 +168,7 @@ define dso_local void @foo2_through_obj(ptr %p, ptr %p2) {
define void @prop_param_func_decl(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@prop_param_func_decl
; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT: call void @bar1(ptr [[P]])
+; CHECK-NEXT: call void @bar1(ptr readonly [[P]])
; CHECK-NEXT: ret void
;
call void @foo1_rdonly(ptr %p)
@@ -178,7 +178,7 @@ define void @prop_param_func_decl(ptr %p) {
define void @prop_param_callbase_def(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@prop_param_callbase_def
; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT: call void @bar1(ptr [[P]])
+; CHECK-NEXT: call void @bar1(ptr readonly [[P]])
; CHECK-NEXT: call void @bar1(ptr [[P]])
; CHECK-NEXT: ret void
;
@@ -190,7 +190,7 @@ define void @prop_param_callbase_def(ptr %p) {
define void @prop_param_callbase_def_2x(ptr %p, ptr %p2) {
; CHECK-LABEL: define {{[^@]+}}@prop_param_callbase_def_2x
; CHECK-SAME: (ptr [[P:%.*]], ptr [[P2:%.*]]) {
-; CHECK-NEXT: call void @bar2(ptr [[P]], ptr [[P]])
+; CHECK-NEXT: call void @bar2(ptr readonly [[P]], ptr readonly [[P]])
; CHECK-NEXT: ret void
;
call void @foo2(ptr readonly %p, ptr %p)
@@ -202,7 +202,7 @@ define void @prop_param_callbase_def_2x_2(ptr %p, ptr %p2) {
; CHECK-SAME: (ptr [[P:%.*]], ptr [[P2:%.*]]) {
; CHECK-NEXT: [[PP_I:%.*]] = getelementptr i8, ptr [[P]], i64 9
; CHECK-NEXT: [[P2P_I:%.*]] = getelementptr i8, ptr [[P2]], i64 123
-; CHECK-NEXT: call void @bar2(ptr [[P2P_I]], ptr [[PP_I]])
+; CHECK-NEXT: call void @bar2(ptr writeonly [[P2P_I]], ptr readonly [[PP_I]])
; CHECK-NEXT: ret void
;
call void @foo2_through_obj(ptr readonly %p, ptr writeonly %p2)
@@ -214,7 +214,7 @@ define void @prop_param_callbase_def_2x_incompat(ptr %p, ptr %p2) {
; CHECK-SAME: (ptr [[P:%.*]], ptr [[P2:%.*]]) {
; CHECK-NEXT: [[PP_I:%.*]] = getelementptr i8, ptr [[P]], i64 9
; CHECK-NEXT: [[P2P_I:%.*]] = getelementptr i8, ptr [[P]], i64 123
-; CHECK-NEXT: call void @bar2(ptr [[P2P_I]], ptr [[PP_I]])
+; CHECK-NEXT: call void @bar2(ptr readonly [[P2P_I]], ptr readnone [[PP_I]])
; CHECK-NEXT: ret void
;
call void @foo2_through_obj(ptr readnone %p, ptr readonly %p)
@@ -224,7 +224,7 @@ define void @prop_param_callbase_def_2x_incompat(ptr %p, ptr %p2) {
define void @prop_param_callbase_def_2x_incompat_2(ptr %p, ptr %p2) {
; CHECK-LABEL: define {{[^@]+}}@prop_param_callbase_def_2x_incompat_2
; CHECK-SAME: (ptr [[P:%.*]], ptr [[P2:%.*]]) {
-; CHECK-NEXT: call void @bar2(ptr [[P]], ptr [[P]])
+; CHECK-NEXT: call void @bar2(ptr readonly [[P]], ptr readonly [[P]])
; CHECK-NEXT: ret void
;
call void @foo2(ptr readonly %p, ptr readnone %p)
@@ -234,7 +234,7 @@ define void @prop_param_callbase_def_2x_incompat_2(ptr %p, ptr %p2) {
define void @prop_param_callbase_def_2x_incompat_3(ptr %p, ptr %p2) {
; CHECK-LABEL: define {{[^@]+}}@prop_param_callbase_def_2x_incompat_3
; CHECK-SAME: (ptr [[P:%.*]], ptr [[P2:%.*]]) {
-; CHECK-NEXT: call void @bar2(ptr [[P]], ptr [[P]])
+; CHECK-NEXT: call void @bar2(ptr readnone [[P]], ptr readnone [[P]])
; CHECK-NEXT: ret void
;
call void @foo2_2(ptr readonly %p, ptr readnone %p)
@@ -244,7 +244,7 @@ define void @prop_param_callbase_def_2x_incompat_3(ptr %p, ptr %p2) {
define void @prop_param_callbase_def_1x_partial(ptr %p, ptr %p2) {
; CHECK-LABEL: define {{[^@]+}}@prop_param_callbase_def_1x_partial
; CHECK-SAME: (ptr [[P:%.*]], ptr [[P2:%.*]]) {
-; CHECK-NEXT: call void @bar2(ptr [[P]], ptr [[P]])
+; CHECK-NEXT: call void @bar2(ptr readonly [[P]], ptr readonly [[P]])
; CHECK-NEXT: ret void
;
call void @foo2(ptr readonly %p, ptr %p)
@@ -264,7 +264,7 @@ define void @prop_param_callbase_def_1x_partial_2(ptr %p, ptr %p2) {
define void @prop_param_callbase_def_1x_partial_3(ptr %p, ptr %p2) {
; CHECK-LABEL: define {{[^@]+}}@prop_param_callbase_def_1x_partial_3
; CHECK-SAME: (ptr [[P:%.*]], ptr [[P2:%.*]]) {
-; CHECK-NEXT: call void @bar2(ptr [[P]], ptr [[P]])
+; CHECK-NEXT: call void @bar2(ptr readonly [[P]], ptr readnone [[P]])
; CHECK-NEXT: ret void
;
call void @foo2_3(ptr readonly %p, ptr %p)
@@ -344,7 +344,7 @@ define void @prop_param_deref_or_null_no_update(ptr %p) {
define void @prop_fn_decl(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@prop_fn_decl
; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT: call void @bar1(ptr [[P]])
+; CHECK-NEXT: call void @bar1(ptr [[P]]) #[[ATTR0]]
; CHECK-NEXT: call void @bar1(ptr [[P]])
; CHECK-NEXT: ret void
;
@@ -356,7 +356,7 @@ define void @prop_fn_decl(ptr %p) {
define void @prop_cb_def_wr(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@prop_cb_def_wr
; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT: call void @bar1(ptr [[P]])
+; CHECK-NEXT: call void @bar1(ptr [[P]]) #[[ATTR0]]
; CHECK-NEXT: call void @bar1(ptr [[P]])
; CHECK-NEXT: ret void
;
@@ -400,7 +400,7 @@ define void @prop_fn_decl_partially_okay_alloca(ptr %p) {
; CHECK-SAME: (ptr [[P:%.*]]) {
; CHECK-NEXT: [[A_I:%.*]] = alloca i32, align 4
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[A_I]])
-; CHECK-NEXT: call void @bar1(ptr [[P]])
+; CHECK-NEXT: call void @bar1(ptr [[P]]) #[[ATTR0]]
; CHECK-NEXT: call void @bar2(ptr [[P]], ptr [[A_I]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[A_I]])
; CHECK-NEXT: call void @bar1(ptr [[P]])
@@ -416,7 +416,7 @@ define void @prop_cb_def_wr_partially_okay_alloca(ptr %p) {
; CHECK-SAME: (ptr [[P:%.*]]) {
; CHECK-NEXT: [[A_I:%.*]] = alloca i32, align 4
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[A_I]])
-; CHECK-NEXT: call void @bar1(ptr [[P]])
+; CHECK-NEXT: call void @bar1(ptr [[P]]) #[[ATTR0]]
; CHECK-NEXT: call void @bar2(ptr [[P]], ptr [[A_I]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[A_I]])
; CHECK-NEXT: call void @bar1(ptr [[P]])
@@ -430,7 +430,7 @@ define void @prop_cb_def_wr_partially_okay_alloca(ptr %p) {
define void @prop_cb_def_readonly(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@prop_cb_def_readonly
; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT: call void @bar1(ptr [[P]])
+; CHECK-NEXT: call void @bar1(ptr [[P]]) #[[ATTR2:[0-9]+]]
; CHECK-NEXT: ret void
;
call void @foo1(ptr %p) readonly
@@ -440,7 +440,7 @@ define void @prop_cb_def_readonly(ptr %p) {
define void @prop_cb_def_readnone(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@prop_cb_def_readnone
; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT: call void @bar1(ptr [[P]])
+; CHECK-NEXT: call void @bar1(ptr [[P]]) #[[ATTR3:[0-9]+]]
; CHECK-NEXT: ret void
;
call void @foo1(ptr %p) readnone
@@ -450,7 +450,7 @@ define void @prop_cb_def_readnone(ptr %p) {
define void @prop_cb_def_argmem_readonly_fail(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@prop_cb_def_argmem_readonly_fail
; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT: call void @bar1(ptr [[P]])
+; CHECK-NEXT: call void @bar1(ptr [[P]]) #[[ATTR4:[0-9]+]]
; CHECK-NEXT: ret void
;
call void @foo1(ptr %p) memory(argmem:read)
@@ -460,7 +460,7 @@ define void @prop_cb_def_argmem_readonly_fail(ptr %p) {
define void @prop_cb_def_inaccessible_none(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@prop_cb_def_inaccessible_none
; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT: call void @bar1(ptr [[P]])
+; CHECK-NEXT: call void @bar1(ptr [[P]]) #[[ATTR3]]
; CHECK-NEXT: ret void
;
call void @foo1(ptr %p) memory(inaccessiblemem:none)
@@ -470,7 +470,7 @@ define void @prop_cb_def_inaccessible_none(ptr %p) {
define void @prop_cb_def_inaccessible_none_argmem_none(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@prop_cb_def_inaccessible_none_argmem_none
; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT: call void @bar1(ptr [[P]])
+; CHECK-NEXT: call void @bar1(ptr [[P]]) #[[ATTR3]]
; CHECK-NEXT: ret void
;
call void @foo1(ptr %p) memory(inaccessiblemem:none, argmem:none)
diff --git a/llvm/test/Transforms/Inline/byval.ll b/llvm/test/Transforms/Inline/byval.ll
index dd5be40b90a8f2a..02ce5f25b01eb34 100644
--- a/llvm/test/Transforms/Inline/byval.ll
+++ b/llvm/test/Transforms/Inline/byval.ll
@@ -131,7 +131,7 @@ define i32 @test4() nounwind {
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 64
-; CHECK-NEXT: call void @g3(ptr [[S]]) #[[ATTR0]]
+; CHECK-NEXT: call void @g3(ptr [[S]]) #[[ATTR3:[0-9]+]]
; CHECK-NEXT: ret i32 4
;
entry:
diff --git a/llvm/test/Transforms/Inline/noalias-calls-always.ll b/llvm/test/Transforms/Inline/noalias-calls-always.ll
index 9c851b932783928..0c897dd8dc540e0 100644
--- a/llvm/test/Transforms/Inline/noalias-calls-always.ll
+++ b/llvm/test/Transforms/Inline/noalias-calls-always.ll
@@ -35,10 +35,10 @@ define void @foo(ptr nocapture %a, ptr nocapture readonly %c, ptr nocapture %b)
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]])
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 512, ptr [[L_I]])
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[A:%.*]], ptr align 16 [[B:%.*]], i64 16, i1 false), !noalias !3
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[B]], ptr align 16 [[C:%.*]], i64 16, i1 false), !noalias !0
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[A]],...
[truncated]
|
You can test this locally with the following command:git-clang-format --diff 6da4ecdf9285225ccc8fa4441b7e9f65e8f4f49c b2ba33f77f2165f5ffa626aa40e67be6762be138 -- llvm/include/llvm/Support/ModRef.h llvm/lib/Transforms/Utils/InlineFunction.cpp View the diff from clang-format here.diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index addcc20e41..58dfeaa9ea 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -30,8 +30,8 @@
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
-#include "llvm/IR/AttributeMask.h"
#include "llvm/IR/Argument.h"
+#include "llvm/IR/AttributeMask.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
|
f8c9f5b
to
54a4d61
Compare
Note on compile time affect: https://llvm-compile-time-tracker.com/compare.php?from=2da4960f20f7e5d88a68ce25636a895284dc66d8&to=f8c9f5bce65756598da22e8aec5d91fb66b16d5c&stat=instructions%3Au Minimal for normal O3, a bit more significant for LTO. |
ping. |
…nlining To avoid losing information, we can propagate some access attribute from the to-be-inlined callee to its callsites. This patch is conservative and only does so for callsites that have no preceding alloca as memory access attribute don't apply to allocas. Assuming no preceeding allocas, we can directly add memory access attributes for `other` and `inaccessible` memory to callsites. We can cannot, however, blindly add `argmem` attributes as the callsite may have different arguments (a follow up patch to add them if the underlying object of all the callsites arguments are also arguments to the callee could be added).
…nlining To avoid losing information, we can propagate some access attribute from the to-be-inlined callee to its callsites. We can propagate argument memory access attributes to callsite parameters if they are from the same underlying object.
54a4d61
to
b2ba33f
Compare
for (BasicBlock &BB : *CalledFunction) { | ||
for (Instruction &Ins : BB) { | ||
CallBase *InnerCB = dyn_cast<CallBase>(&Ins); | ||
if (InnerCB != nullptr) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if (auto *InnerCB = dyn_cast<CallBase>(&Ins))
// Check if the underlying value for the parameter is an argument. | ||
const Value *UnderlyingV = | ||
getUnderlyingObject(InnerCB->getArgOperand(I)); | ||
if (const Argument *Arg = dyn_cast<Argument>(UnderlyingV)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if (auto *Arg = dyn_cast<Argument>(UnderlyingV)) {
} | ||
|
||
auto BBInfo = FirstAllocaAndCBs->find(BB); | ||
assert(BBInfo != FirstAllocaAndCBs->end()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(style) assert message
auto PBBInfo = GetBBAllocaAndCallsiteInfo(PBB, FirstAllocaAndCBs, ME); | ||
if (PBBInfo.first) { | ||
auto BBInfo = FirstAllocaAndCBs->find(BB); | ||
assert(BBInfo != FirstAllocaAndCBs->end()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(style) assert message
if (BBInfo.second.empty()) | ||
continue; | ||
for (CallBase *OtherCB : BBInfo.second) { | ||
assert(OtherCB->getParent() == &BB); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(style) assert message
Dropping this. |