Skip to content

Commit e5cfe0b

Browse files
committed
Auto merge of #128118 - saethlin:closures-can-be-shared, r=<try>
Rework instantiation mode selection in monomorphization This PR is _extremely_ cooked because I'm just dumping ideas into it that seem good and can bootstrap the compiler without linker errors. I don't know if all the ideas are good yet. r? `@ghost`
2 parents 47243b3 + 14b76c9 commit e5cfe0b

File tree

13 files changed

+150
-142
lines changed

13 files changed

+150
-142
lines changed

compiler/rustc_codegen_gcc/src/attributes.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ pub fn from_fn_attrs<'gcc, 'tcx>(
4747
let inline = if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::NAKED) {
4848
InlineAttr::Never
4949
} else if codegen_fn_attrs.inline == InlineAttr::None
50-
&& instance.def.requires_inline(cx.tcx)
50+
&& tcx.cross_crate_inlinable(instance.def_id())
5151
{
5252
InlineAttr::Hint
5353
} else {

compiler/rustc_codegen_llvm/src/attributes.rs

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -348,12 +348,13 @@ pub fn from_fn_attrs<'ll, 'tcx>(
348348
OptimizeAttr::Speed => {}
349349
}
350350

351-
let inline =
352-
if codegen_fn_attrs.inline == InlineAttr::None && instance.def.requires_inline(cx.tcx) {
353-
InlineAttr::Hint
354-
} else {
355-
codegen_fn_attrs.inline
356-
};
351+
let inline = if codegen_fn_attrs.inline == InlineAttr::None
352+
&& cx.tcx.cross_crate_inlinable(instance.def_id())
353+
{
354+
InlineAttr::Hint
355+
} else {
356+
codegen_fn_attrs.inline
357+
};
357358
to_add.extend(inline_attr(cx, inline));
358359

359360
// The `uwtable` attribute according to LLVM is:

compiler/rustc_codegen_ssa/src/back/symbol_export.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,9 +98,7 @@ fn reachable_non_generics_provider(tcx: TyCtxt<'_>, _: LocalCrate) -> DefIdMap<S
9898
// Functions marked with #[inline] are codegened with "internal"
9999
// linkage and are not exported unless marked with an extern
100100
// indicator
101-
if !Instance::mono(tcx, def_id.to_def_id()).def.generates_cgu_internal_copy(tcx)
102-
|| tcx.codegen_fn_attrs(def_id.to_def_id()).contains_extern_indicator()
103-
{
101+
if !Instance::mono(tcx, def_id.to_def_id()).def.generates_cgu_internal_copy(tcx) {
104102
Some(def_id)
105103
} else {
106104
None

compiler/rustc_metadata/src/rmeta/decoder.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1321,7 +1321,11 @@ impl<'a, 'tcx> CrateMetadataRef<'a> {
13211321
}
13221322

13231323
fn cross_crate_inlinable(self, id: DefIndex) -> bool {
1324-
self.root.tables.cross_crate_inlinable.get(self, id)
1324+
self.root
1325+
.tables
1326+
.cross_crate_inlinable
1327+
.get(self, id)
1328+
.expect(&format!("No cross_crate_inlinable for {:?}", id))
13251329
}
13261330

13271331
fn get_fn_has_self_parameter(self, id: DefIndex, sess: &'a Session) -> bool {

compiler/rustc_metadata/src/rmeta/encoder.rs

Lines changed: 42 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1656,19 +1656,50 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
16561656
let tcx = self.tcx;
16571657
let reachable_set = tcx.reachable_set(());
16581658

1659-
let keys_and_jobs = tcx.mir_keys(()).iter().filter_map(|&def_id| {
1659+
let lang_items = tcx.lang_items();
1660+
for (trait_def_id, symbol) in [
1661+
(lang_items.clone_trait(), sym::clone),
1662+
(lang_items.fn_once_trait(), sym::call_once),
1663+
(lang_items.fn_mut_trait(), sym::call_mut),
1664+
(lang_items.fn_trait(), sym::call),
1665+
] {
1666+
if let Some(trait_def_id) = trait_def_id {
1667+
let fn_def_id = tcx
1668+
.associated_items(trait_def_id)
1669+
.filter_by_name_unhygienic(symbol)
1670+
.next()
1671+
.unwrap()
1672+
.def_id;
1673+
if fn_def_id.is_local() {
1674+
self.tables
1675+
.cross_crate_inlinable
1676+
.set(fn_def_id.index, Some(self.tcx.cross_crate_inlinable(fn_def_id)));
1677+
}
1678+
}
1679+
}
1680+
1681+
for (symbol, _) in tcx.exported_symbols(LOCAL_CRATE) {
1682+
use crate::rmeta::ExportedSymbol::*;
1683+
let (NonGeneric(def_id) | Generic(def_id, _) | ThreadLocalShim(def_id)) = symbol else {
1684+
continue;
1685+
};
1686+
self.tables.cross_crate_inlinable.set(def_id.index, Some(false));
1687+
}
1688+
1689+
for def_id in tcx.mir_keys(()).iter().copied() {
1690+
self.tables
1691+
.cross_crate_inlinable
1692+
.set(def_id.to_def_id().index, Some(self.tcx.cross_crate_inlinable(def_id)));
16601693
let (encode_const, encode_opt) = should_encode_mir(tcx, reachable_set, def_id);
1661-
if encode_const || encode_opt { Some((def_id, encode_const, encode_opt)) } else { None }
1662-
});
1663-
for (def_id, encode_const, encode_opt) in keys_and_jobs {
1694+
if encode_const || encode_opt {
1695+
} else {
1696+
continue;
1697+
}
16641698
debug_assert!(encode_const || encode_opt);
16651699

16661700
debug!("EntryBuilder::encode_mir({:?})", def_id);
16671701
if encode_opt {
16681702
record!(self.tables.optimized_mir[def_id.to_def_id()] <- tcx.optimized_mir(def_id));
1669-
self.tables
1670-
.cross_crate_inlinable
1671-
.set(def_id.to_def_id().index, self.tcx.cross_crate_inlinable(def_id));
16721703
record!(self.tables.closure_saved_names_of_captured_variables[def_id.to_def_id()]
16731704
<- tcx.closure_saved_names_of_captured_variables(def_id));
16741705

@@ -1710,6 +1741,10 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
17101741
self.tables.unused_generic_params.set(def_id.local_def_index, unused);
17111742
}
17121743

1744+
if let Some(def_id) = tcx.lang_items().drop_in_place_fn() {
1745+
self.tables.cross_crate_inlinable.set(def_id.index, Some(false));
1746+
}
1747+
17131748
// Encode all the deduced parameter attributes for everything that has MIR, even for items
17141749
// that can't be inlined. But don't if we aren't optimizing in non-incremental mode, to
17151750
// save the query traffic.

compiler/rustc_metadata/src/rmeta/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -400,7 +400,7 @@ define_tables! {
400400
// That's why the encoded list needs to contain `ModChild` structures describing all the names
401401
// individually instead of `DefId`s.
402402
module_children_reexports: Table<DefIndex, LazyArray<ModChild>>,
403-
cross_crate_inlinable: Table<DefIndex, bool>,
403+
cross_crate_inlinable: Table<DefIndex, Option<bool>>,
404404

405405
- optional:
406406
attributes: Table<DefIndex, LazyArray<ast::Attribute>>,

compiler/rustc_middle/src/mir/mono.rs

Lines changed: 8 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
use crate::dep_graph::{DepNode, WorkProduct, WorkProductId};
22
use crate::ty::{GenericArgs, Instance, InstanceKind, SymbolName, TyCtxt};
3-
use rustc_attr::InlineAttr;
43
use rustc_data_structures::base_n::BaseNString;
54
use rustc_data_structures::base_n::ToBaseN;
65
use rustc_data_structures::base_n::CASE_INSENSITIVE;
@@ -13,7 +12,6 @@ use rustc_hir::ItemId;
1312
use rustc_index::Idx;
1413
use rustc_macros::{HashStable, TyDecodable, TyEncodable};
1514
use rustc_query_system::ich::StableHashingContext;
16-
use rustc_session::config::OptLevel;
1715
use rustc_span::symbol::Symbol;
1816
use rustc_span::Span;
1917
use std::fmt;
@@ -105,41 +103,19 @@ impl<'tcx> MonoItem<'tcx> {
105103
}
106104

107105
pub fn instantiation_mode(&self, tcx: TyCtxt<'tcx>) -> InstantiationMode {
108-
let generate_cgu_internal_copies = tcx
109-
.sess
110-
.opts
111-
.unstable_opts
112-
.inline_in_all_cgus
113-
.unwrap_or_else(|| tcx.sess.opts.optimize != OptLevel::No)
114-
&& !tcx.sess.link_dead_code();
115-
106+
if tcx.sess.link_dead_code() {
107+
return InstantiationMode::GloballyShared { may_conflict: false };
108+
}
116109
match *self {
117110
MonoItem::Fn(ref instance) => {
118111
let entry_def_id = tcx.entry_fn(()).map(|(id, _)| id);
119-
// If this function isn't inlined or otherwise has an extern
120-
// indicator, then we'll be creating a globally shared version.
121-
if tcx.codegen_fn_attrs(instance.def_id()).contains_extern_indicator()
122-
|| !instance.def.generates_cgu_internal_copy(tcx)
123-
|| Some(instance.def_id()) == entry_def_id
124-
{
112+
if Some(instance.def_id()) == entry_def_id {
125113
return InstantiationMode::GloballyShared { may_conflict: false };
126114
}
127-
128-
// At this point we don't have explicit linkage and we're an
129-
// inlined function. If we're inlining into all CGUs then we'll
130-
// be creating a local copy per CGU.
131-
if generate_cgu_internal_copies {
132-
return InstantiationMode::LocalCopy;
133-
}
134-
135-
// Finally, if this is `#[inline(always)]` we're sure to respect
136-
// that with an inline copy per CGU, but otherwise we'll be
137-
// creating one copy of this `#[inline]` function which may
138-
// conflict with upstream crates as it could be an exported
139-
// symbol.
140-
match tcx.codegen_fn_attrs(instance.def_id()).inline {
141-
InlineAttr::Always => InstantiationMode::LocalCopy,
142-
_ => InstantiationMode::GloballyShared { may_conflict: true },
115+
if tcx.cross_crate_inlinable(instance.def_id()) {
116+
InstantiationMode::LocalCopy
117+
} else {
118+
InstantiationMode::GloballyShared { may_conflict: false }
143119
}
144120
}
145121
MonoItem::Static(..) | MonoItem::GlobalAsm(..) => {

compiler/rustc_middle/src/query/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2285,7 +2285,7 @@ rustc_queries! {
22852285
}
22862286

22872287
query cross_crate_inlinable(def_id: DefId) -> bool {
2288-
desc { "whether the item should be made inlinable across crates" }
2288+
desc { |tcx| "deciding whether `{}` should be inlinable across crates", tcx.def_path_str(key) }
22892289
separate_provide_extern
22902290
}
22912291
}

compiler/rustc_middle/src/ty/instance.rs

Lines changed: 0 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -286,26 +286,6 @@ impl<'tcx> InstanceKind<'tcx> {
286286
tcx.get_attrs(self.def_id(), attr)
287287
}
288288

289-
/// Returns `true` if the LLVM version of this instance is unconditionally
290-
/// marked with `inline`. This implies that a copy of this instance is
291-
/// generated in every codegen unit.
292-
/// Note that this is only a hint. See the documentation for
293-
/// `generates_cgu_internal_copy` for more information.
294-
pub fn requires_inline(&self, tcx: TyCtxt<'tcx>) -> bool {
295-
use rustc_hir::definitions::DefPathData;
296-
let def_id = match *self {
297-
ty::InstanceKind::Item(def) => def,
298-
ty::InstanceKind::DropGlue(_, Some(_)) => return false,
299-
ty::InstanceKind::AsyncDropGlueCtorShim(_, Some(_)) => return false,
300-
ty::InstanceKind::ThreadLocalShim(_) => return false,
301-
_ => return true,
302-
};
303-
matches!(
304-
tcx.def_key(def_id).disambiguated_data.data,
305-
DefPathData::Ctor | DefPathData::Closure
306-
)
307-
}
308-
309289
/// Returns `true` if the machine code for this instance is instantiated in
310290
/// each codegen unit that references it.
311291
/// Note that this is only a hint! The compiler can globally decide to *not*
@@ -314,39 +294,6 @@ impl<'tcx> InstanceKind<'tcx> {
314294
/// `-Copt-level=0`) then the time for generating them is wasted and it's
315295
/// better to create a single copy with external linkage.
316296
pub fn generates_cgu_internal_copy(&self, tcx: TyCtxt<'tcx>) -> bool {
317-
if self.requires_inline(tcx) {
318-
return true;
319-
}
320-
if let ty::InstanceKind::DropGlue(.., Some(ty))
321-
| ty::InstanceKind::AsyncDropGlueCtorShim(.., Some(ty)) = *self
322-
{
323-
// Drop glue generally wants to be instantiated at every codegen
324-
// unit, but without an #[inline] hint. We should make this
325-
// available to normal end-users.
326-
if tcx.sess.opts.incremental.is_none() {
327-
return true;
328-
}
329-
// When compiling with incremental, we can generate a *lot* of
330-
// codegen units. Including drop glue into all of them has a
331-
// considerable compile time cost.
332-
//
333-
// We include enums without destructors to allow, say, optimizing
334-
// drops of `Option::None` before LTO. We also respect the intent of
335-
// `#[inline]` on `Drop::drop` implementations.
336-
return ty.ty_adt_def().map_or(true, |adt_def| {
337-
match *self {
338-
ty::InstanceKind::DropGlue(..) => adt_def.destructor(tcx).map(|dtor| dtor.did),
339-
ty::InstanceKind::AsyncDropGlueCtorShim(..) => {
340-
adt_def.async_destructor(tcx).map(|dtor| dtor.ctor)
341-
}
342-
_ => unreachable!(),
343-
}
344-
.map_or_else(|| adt_def.is_enum(), |did| tcx.cross_crate_inlinable(did))
345-
});
346-
}
347-
if let ty::InstanceKind::ThreadLocalShim(..) = *self {
348-
return false;
349-
}
350297
tcx.cross_crate_inlinable(self.def_id())
351298
}
352299

compiler/rustc_mir_transform/src/cross_crate_inline.rs

Lines changed: 50 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use crate::pass_manager as pm;
33
use rustc_attr::InlineAttr;
44
use rustc_hir::def::DefKind;
55
use rustc_hir::def_id::LocalDefId;
6+
use rustc_hir::LangItem;
67
use rustc_middle::mir::visit::Visitor;
78
use rustc_middle::mir::*;
89
use rustc_middle::query::Providers;
@@ -16,24 +17,34 @@ pub fn provide(providers: &mut Providers) {
1617
}
1718

1819
fn cross_crate_inlinable(tcx: TyCtxt<'_>, def_id: LocalDefId) -> bool {
20+
// Bail quickly for DefIds that wouldn't be inlinable anyway, such as statics.
21+
if !matches!(
22+
tcx.def_kind(def_id),
23+
DefKind::Fn | DefKind::AssocFn | DefKind::Closure | DefKind::Ctor(..)
24+
) {
25+
return false;
26+
}
27+
28+
// The program entrypoint is never inlinable in any sense.
29+
if let Some((entry_fn, _)) = tcx.entry_fn(()) {
30+
if def_id == entry_fn.expect_local() {
31+
return false;
32+
}
33+
}
34+
1935
let codegen_fn_attrs = tcx.codegen_fn_attrs(def_id);
2036
// If this has an extern indicator, then this function is globally shared and thus will not
2137
// generate cgu-internal copies which would make it cross-crate inlinable.
2238
if codegen_fn_attrs.contains_extern_indicator() {
2339
return false;
2440
}
2541

26-
// This just reproduces the logic from Instance::requires_inline.
27-
match tcx.def_kind(def_id) {
28-
DefKind::Ctor(..) | DefKind::Closure => return true,
29-
DefKind::Fn | DefKind::AssocFn => {}
30-
_ => return false,
31-
}
32-
33-
// From this point on, it is valid to return true or false.
34-
if tcx.sess.opts.unstable_opts.cross_crate_inline_threshold == InliningThreshold::Always {
35-
return true;
36-
}
42+
// From this point on, it is technically valid to return true or false.
43+
let threshold = match tcx.sess.opts.unstable_opts.cross_crate_inline_threshold {
44+
InliningThreshold::Always => return true,
45+
InliningThreshold::Sometimes(threshold) => threshold,
46+
InliningThreshold::Never => return false,
47+
};
3748

3849
if tcx.has_attr(def_id, sym::rustc_intrinsic) {
3950
// Intrinsic fallback bodies are always cross-crate inlineable.
@@ -43,42 +54,55 @@ fn cross_crate_inlinable(tcx: TyCtxt<'_>, def_id: LocalDefId) -> bool {
4354
return true;
4455
}
4556

57+
// Don't do any inference when incremental compilation is enabled; the additional inlining that
58+
// inference permits also creates more work for small edits.
59+
if tcx.sess.opts.incremental.is_some() {
60+
return false;
61+
}
62+
4663
// Obey source annotations first; this is important because it means we can use
4764
// #[inline(never)] to force code generation.
4865
match codegen_fn_attrs.inline {
4966
InlineAttr::Never => return false,
5067
InlineAttr::Hint | InlineAttr::Always => return true,
51-
_ => {}
52-
}
53-
54-
// Don't do any inference when incremental compilation is enabled; the additional inlining that
55-
// inference permits also creates more work for small edits.
56-
if tcx.sess.opts.incremental.is_some() {
57-
return false;
68+
InlineAttr::None => {}
5869
}
5970

6071
// Don't do any inference if codegen optimizations are disabled and also MIR inlining is not
6172
// enabled. This ensures that we do inference even if someone only passes -Zinline-mir,
6273
// which is less confusing than having to also enable -Copt-level=1.
6374
if matches!(tcx.sess.opts.optimize, OptLevel::No) && !pm::should_run_pass(tcx, &inline::Inline)
6475
{
65-
return false;
76+
if !matches!(tcx.def_kind(def_id), DefKind::Ctor(..)) {
77+
return false;
78+
}
6679
}
6780

68-
if !tcx.is_mir_available(def_id) {
69-
return false;
81+
if tcx.is_lang_item(def_id.into(), LangItem::DropInPlace)
82+
|| tcx.is_lang_item(def_id.into(), LangItem::AsyncDropInPlace)
83+
{
84+
return true;
7085
}
7186

72-
let threshold = match tcx.sess.opts.unstable_opts.cross_crate_inline_threshold {
73-
InliningThreshold::Always => return true,
74-
InliningThreshold::Sometimes(threshold) => threshold,
75-
InliningThreshold::Never => return false,
76-
};
87+
// If there is no MIR for the DefId, we can't analyze the body. But also, this only arises in
88+
// two relevant cases: extern functions and MIR shims. So here we recognize the MIR shims by a
89+
// DefId that has no MIR and whose parent is one of the shimmed traits.
90+
// Everything else is extern functions, and thus not a candidate for inlining.
91+
if !tcx.is_mir_available(def_id) {
92+
let parent = tcx.parent(def_id.into());
93+
match tcx.lang_items().from_def_id(parent.into()) {
94+
Some(LangItem::Clone | LangItem::FnOnce | LangItem::Fn | LangItem::FnMut) => {
95+
return true;
96+
}
97+
_ => return false,
98+
}
99+
}
77100

78101
let mir = tcx.optimized_mir(def_id);
79102
let mut checker =
80103
CostChecker { tcx, callee_body: mir, calls: 0, statements: 0, landing_pads: 0, resumes: 0 };
81104
checker.visit_body(mir);
105+
82106
checker.calls == 0
83107
&& checker.resumes == 0
84108
&& checker.landing_pads == 0

0 commit comments

Comments
 (0)