Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion compiler/rustc_codegen_cranelift/src/driver/aot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ use cranelift_object::{ObjectBuilder, ObjectModule};
use rustc_codegen_ssa::assert_module_sources::CguReuse;
use rustc_codegen_ssa::back::link::ensure_removed;
use rustc_codegen_ssa::base::determine_cgu_reuse;
use rustc_codegen_ssa::{CodegenResults, CompiledModule, CrateInfo, errors as ssa_errors};
use rustc_codegen_ssa::{
CodegenResults, CompiledModule, CrateInfo, ModuleKind, errors as ssa_errors,
};
use rustc_data_structures::profiling::SelfProfilerRef;
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
use rustc_data_structures::sync::{IntoDynSyncSend, par_map};
Expand Down Expand Up @@ -361,6 +363,7 @@ fn emit_cgu(
invocation_temp,
prof,
product.object,
ModuleKind::Regular,
name.clone(),
producer,
)?;
Expand All @@ -369,6 +372,7 @@ fn emit_cgu(
module_regular,
module_global_asm: global_asm_object_file.map(|global_asm_object_file| CompiledModule {
name: format!("{name}.asm"),
kind: ModuleKind::Regular,
object: Some(global_asm_object_file),
dwarf_object: None,
bytecode: None,
Expand All @@ -385,6 +389,7 @@ fn emit_module(
invocation_temp: Option<&str>,
prof: &SelfProfilerRef,
mut object: cranelift_object::object::write::Object<'_>,
kind: ModuleKind,
name: String,
producer_str: &str,
) -> Result<CompiledModule, String> {
Expand Down Expand Up @@ -425,6 +430,7 @@ fn emit_module(

Ok(CompiledModule {
name,
kind,
object: Some(tmp_file),
dwarf_object: None,
bytecode: None,
Expand Down Expand Up @@ -479,6 +485,7 @@ fn reuse_workproduct_for_cgu(
Ok(ModuleCodegenResult {
module_regular: CompiledModule {
name: cgu.name().to_string(),
kind: ModuleKind::Regular,
object: Some(obj_out_regular),
dwarf_object: None,
bytecode: None,
Expand All @@ -488,6 +495,7 @@ fn reuse_workproduct_for_cgu(
},
module_global_asm: source_file_global_asm.map(|source_file| CompiledModule {
name: cgu.name().to_string(),
kind: ModuleKind::Regular,
object: Some(obj_out_global_asm),
dwarf_object: None,
bytecode: None,
Expand Down Expand Up @@ -643,6 +651,7 @@ fn emit_allocator_module(tcx: TyCtxt<'_>) -> Option<CompiledModule> {
tcx.sess.invocation_temp.as_deref(),
&tcx.sess.prof,
product.object,
ModuleKind::Allocator,
"allocator_shim".to_owned(),
&crate::debuginfo::producer(tcx.sess),
) {
Expand Down
8 changes: 7 additions & 1 deletion compiler/rustc_codegen_llvm/src/back/lto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use object::{Object, ObjectSection};
use rustc_codegen_ssa::back::lto::{SerializedModule, ThinModule, ThinShared};
use rustc_codegen_ssa::back::write::{CodegenContext, FatLtoInput};
use rustc_codegen_ssa::traits::*;
use rustc_codegen_ssa::{ModuleCodegen, looks_like_rust_object_file};
use rustc_codegen_ssa::{ModuleCodegen, ModuleKind, looks_like_rust_object_file};
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::memmap::Mmap;
use rustc_errors::DiagCtxtHandle;
Expand Down Expand Up @@ -225,9 +225,15 @@ fn fat_lto(
// All the other modules will be serialized and reparsed into the new
// context, so this hopefully avoids serializing and parsing the largest
// codegen unit.
//
// Additionally use a regular module as the base here to ensure that various
// file copy operations in the backend work correctly. The only other kind
// of module here should be an allocator one, and if your crate is smaller
// than the allocator module then the size doesn't really matter anyway.
let costliest_module = in_memory
.iter()
.enumerate()
.filter(|&(_, module)| module.kind == ModuleKind::Regular)
.map(|(i, module)| {
let cost = unsafe { llvm::LLVMRustModuleCost(module.module_llvm.llmod()) };
(cost, i)
Expand Down
14 changes: 0 additions & 14 deletions compiler/rustc_codegen_ssa/src/back/lto.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use std::ffi::CString;
use std::sync::Arc;

use rustc_ast::expand::allocator::AllocatorKind;
use rustc_data_structures::memmap::Mmap;
use rustc_hir::def_id::{CrateNum, LOCAL_CRATE};
use rustc_middle::middle::exported_symbols::{ExportedSymbol, SymbolExportInfo, SymbolExportLevel};
Expand Down Expand Up @@ -96,19 +95,6 @@ pub(super) fn exported_symbols_for_lto(
.filter_map(|&(s, info): &(ExportedSymbol<'_>, SymbolExportInfo)| {
if info.level.is_below_threshold(export_threshold) || info.used {
Some(symbol_name_for_instance_in_crate(tcx, s, cnum))
} else if export_threshold == SymbolExportLevel::C
&& info.rustc_std_internal_symbol
&& let Some(AllocatorKind::Default) = allocator_kind_for_codegen(tcx)
{
// Export the __rdl_* exports for usage by the allocator shim when not using
// #[global_allocator]. Most of the conditions above are only used to avoid
// unnecessary expensive symbol_name_for_instance_in_crate calls.
let sym = symbol_name_for_instance_in_crate(tcx, s, cnum);
if sym.contains("__rdl_") || sym.contains("__rg_oom") {
Some(sym)
} else {
None
}
} else {
None
}
Expand Down
43 changes: 32 additions & 11 deletions compiler/rustc_codegen_ssa/src/back/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,7 @@ pub struct CodegenContext<B: WriteBackendMethods> {
pub output_filenames: Arc<OutputFilenames>,
pub invocation_temp: Option<String>,
pub module_config: Arc<ModuleConfig>,
pub allocator_config: Arc<ModuleConfig>,
pub tm_factory: TargetMachineFactoryFn<B>,
pub msvc_imps_needed: bool,
pub is_pe_coff: bool,
Expand Down Expand Up @@ -489,7 +490,7 @@ fn copy_all_cgu_workproducts_to_incr_comp_cache_dir(

let _timer = sess.timer("copy_all_cgu_workproducts_to_incr_comp_cache_dir");

for module in &compiled_modules.modules {
for module in compiled_modules.modules.iter().filter(|m| m.kind == ModuleKind::Regular) {
let mut files = Vec::new();
if let Some(object_file_path) = &module.object {
files.push((OutputType::Object.extension(), object_file_path.as_path()));
Expand Down Expand Up @@ -794,12 +795,19 @@ pub(crate) fn compute_per_cgu_lto_type(
sess_lto: &Lto,
opts: &config::Options,
sess_crate_types: &[CrateType],
module_kind: ModuleKind,
) -> ComputedLtoType {
// If the linker does LTO, we don't have to do it. Note that we
// keep doing full LTO, if it is requested, as not to break the
// assumption that the output will be a single module.
let linker_does_lto = opts.cg.linker_plugin_lto.enabled();

// When we're automatically doing ThinLTO for multi-codegen-unit
// builds we don't actually want to LTO the allocator module if
// it shows up. This is due to various linker shenanigans that
// we'll encounter later.
let is_allocator = module_kind == ModuleKind::Allocator;

// We ignore a request for full crate graph LTO if the crate type
// is only an rlib, as there is no full crate graph to process,
// that'll happen later.
Expand All @@ -811,7 +819,7 @@ pub(crate) fn compute_per_cgu_lto_type(
let is_rlib = matches!(sess_crate_types, [CrateType::Rlib]);

match sess_lto {
Lto::ThinLocal if !linker_does_lto => ComputedLtoType::Thin,
Lto::ThinLocal if !linker_does_lto && !is_allocator => ComputedLtoType::Thin,
Lto::Thin if !linker_does_lto && !is_rlib => ComputedLtoType::Thin,
Lto::Fat if !is_rlib => ComputedLtoType::Fat,
_ => ComputedLtoType::No,
Expand All @@ -825,18 +833,23 @@ fn execute_optimize_work_item<B: ExtraBackendMethods>(
let dcx = cgcx.create_dcx();
let dcx = dcx.handle();

B::optimize(cgcx, dcx, &mut module, &cgcx.module_config);
let module_config = match module.kind {
ModuleKind::Regular => &cgcx.module_config,
ModuleKind::Allocator => &cgcx.allocator_config,
};

B::optimize(cgcx, dcx, &mut module, module_config);

// After we've done the initial round of optimizations we need to
// decide whether to synchronously codegen this module or ship it
// back to the coordinator thread for further LTO processing (which
// has to wait for all the initial modules to be optimized).

let lto_type = compute_per_cgu_lto_type(&cgcx.lto, &cgcx.opts, &cgcx.crate_types);
let lto_type = compute_per_cgu_lto_type(&cgcx.lto, &cgcx.opts, &cgcx.crate_types, module.kind);

// If we're doing some form of incremental LTO then we need to be sure to
// save our module to disk first.
let bitcode = if cgcx.module_config.emit_pre_lto_bc {
let bitcode = if module_config.emit_pre_lto_bc {
let filename = pre_lto_bitcode_filename(&module.name);
cgcx.incr_comp_session_dir.as_ref().map(|path| path.join(&filename))
} else {
Expand All @@ -845,7 +858,7 @@ fn execute_optimize_work_item<B: ExtraBackendMethods>(

match lto_type {
ComputedLtoType::No => {
let module = B::codegen(cgcx, module, &cgcx.module_config);
let module = B::codegen(cgcx, module, module_config);
WorkItemResult::Finished(module)
}
ComputedLtoType::Thin => {
Expand Down Expand Up @@ -947,6 +960,7 @@ fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>(

WorkItemResult::Finished(CompiledModule {
links_from_incr_cache,
kind: ModuleKind::Regular,
name: module.name,
object,
dwarf_object,
Expand Down Expand Up @@ -1133,6 +1147,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
diag_emitter: shared_emitter.clone(),
output_filenames: Arc::clone(tcx.output_filenames(())),
module_config: regular_config,
allocator_config,
tm_factory: backend.target_machine_factory(tcx.sess, ol, backend_features),
msvc_imps_needed: msvc_imps_needed(tcx),
is_pe_coff: tcx.sess.target.is_like_windows,
Expand All @@ -1147,11 +1162,6 @@ fn start_executing_work<B: ExtraBackendMethods>(
invocation_temp: sess.invocation_temp.clone(),
};

let compiled_allocator_module = allocator_module.map(|mut allocator_module| {
B::optimize(&cgcx, tcx.sess.dcx(), &mut allocator_module, &allocator_config);
B::codegen(&cgcx, allocator_module, &allocator_config)
});

// This is the "main loop" of parallel work happening for parallel codegen.
// It's here that we manage parallelism, schedule work, and work with
// messages coming from clients.
Expand Down Expand Up @@ -1331,6 +1341,17 @@ fn start_executing_work<B: ExtraBackendMethods>(

let mut llvm_start_time: Option<VerboseTimingGuard<'_>> = None;

let compiled_allocator_module = allocator_module.and_then(|allocator_module| {
match execute_optimize_work_item(&cgcx, allocator_module) {
WorkItemResult::Finished(compiled_module) => return Some(compiled_module),
WorkItemResult::NeedsFatLto(fat_lto_input) => needs_fat_lto.push(fat_lto_input),
WorkItemResult::NeedsThinLto(name, thin_buffer) => {
needs_thin_lto.push((name, thin_buffer))
}
}
None
});

// Run the message loop while there's still anything that needs message
// processing. Note that as soon as codegen is aborted we simply want to
// wait for all existing work to finish, so many of the conditions here
Expand Down
11 changes: 9 additions & 2 deletions compiler/rustc_codegen_ssa/src/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,9 @@ use crate::meth::load_vtable;
use crate::mir::operand::OperandValue;
use crate::mir::place::PlaceRef;
use crate::traits::*;
use crate::{CachedModuleCodegen, CodegenLintLevels, CrateInfo, ModuleCodegen, errors, meth, mir};
use crate::{
CachedModuleCodegen, CodegenLintLevels, CrateInfo, ModuleCodegen, ModuleKind, errors, meth, mir,
};

pub(crate) fn bin_op_to_icmp_predicate(op: BinOp, signed: bool) -> IntPredicate {
match (op, signed) {
Expand Down Expand Up @@ -1124,7 +1126,12 @@ pub fn determine_cgu_reuse<'tcx>(tcx: TyCtxt<'tcx>, cgu: &CodegenUnit<'tcx>) ->
// We can re-use either the pre- or the post-thinlto state. If no LTO is
// being performed then we can use post-LTO artifacts, otherwise we must
// reuse pre-LTO artifacts
match compute_per_cgu_lto_type(&tcx.sess.lto(), &tcx.sess.opts, tcx.crate_types()) {
match compute_per_cgu_lto_type(
&tcx.sess.lto(),
&tcx.sess.opts,
tcx.crate_types(),
ModuleKind::Regular,
) {
ComputedLtoType::No => CguReuse::PostLto,
_ => CguReuse::PreLto,
}
Expand Down
2 changes: 2 additions & 0 deletions compiler/rustc_codegen_ssa/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ impl<M> ModuleCodegen<M> {

CompiledModule {
name: self.name.clone(),
kind: self.kind,
object,
dwarf_object,
bytecode,
Expand All @@ -133,6 +134,7 @@ impl<M> ModuleCodegen<M> {
#[derive(Debug, Encodable, Decodable)]
pub struct CompiledModule {
pub name: String,
pub kind: ModuleKind,
pub object: Option<PathBuf>,
pub dwarf_object: Option<PathBuf>,
pub bytecode: Option<PathBuf>,
Expand Down
18 changes: 18 additions & 0 deletions tests/ui/lto/lto-global-allocator.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
//@ compile-flags: --crate-type cdylib -C lto
//@ build-pass
//@ no-prefer-dynamic

use std::alloc::{GlobalAlloc, Layout};

struct MyAllocator;

unsafe impl GlobalAlloc for MyAllocator {
unsafe fn alloc(&self, _layout: Layout) -> *mut u8 {
todo!()
}

unsafe fn dealloc(&self, _ptr: *mut u8, _layout: Layout) {}
}

#[global_allocator]
static GLOBAL: MyAllocator = MyAllocator;
Loading