From b1d5922285b41259bd4a4b1b788ec679f1781ea9 Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Thu, 4 Sep 2025 09:36:27 +0000 Subject: [PATCH 1/5] Move desc out of WorkItem::short_description to allow reusing in a future commit --- compiler/rustc_codegen_ssa/src/back/write.rs | 78 ++++++++++---------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs index cbaf67d734547..931dc96aaa8e5 100644 --- a/compiler/rustc_codegen_ssa/src/back/write.rs +++ b/compiler/rustc_codegen_ssa/src/back/write.rs @@ -714,48 +714,48 @@ pub(crate) enum WorkItem { ThinLto(lto::ThinModule), } +// `pthread_setname()` on *nix ignores anything beyond the first 15 +// bytes. Use short descriptions to maximize the space available for +// the module name. +#[cfg(not(windows))] +fn desc(short: &str, _long: &str, name: &str) -> String { + // The short label is three bytes, and is followed by a space. That + // leaves 11 bytes for the CGU name. How we obtain those 11 bytes + // depends on the CGU name form. + // + // - Non-incremental, e.g. `regex.f10ba03eb5ec7975-cgu.0`: the part + // before the `-cgu.0` is the same for every CGU, so use the + // `cgu.0` part. The number suffix will be different for each + // CGU. + // + // - Incremental (normal), e.g. `2i52vvl2hco29us0`: use the whole + // name because each CGU will have a unique ASCII hash, and the + // first 11 bytes will be enough to identify it. + // + // - Incremental (with `-Zhuman-readable-cgu-names`), e.g. + // `regex.f10ba03eb5ec7975-re_builder.volatile`: use the whole + // name. The first 11 bytes won't be enough to uniquely identify + // it, but no obvious substring will, and this is a rarely used + // option so it doesn't matter much. + // + assert_eq!(short.len(), 3); + let name = if let Some(index) = name.find("-cgu.") { + &name[index + 1..] // +1 skips the leading '-'. + } else { + name + }; + format!("{short} {name}") +} + +// Windows has no thread name length limit, so use more descriptive names. +#[cfg(windows)] +fn desc(_short: &str, long: &str, name: &str) -> String { + format!("{long} {name}") +} + impl WorkItem { /// Generate a short description of this work item suitable for use as a thread name. fn short_description(&self) -> String { - // `pthread_setname()` on *nix ignores anything beyond the first 15 - // bytes. Use short descriptions to maximize the space available for - // the module name. - #[cfg(not(windows))] - fn desc(short: &str, _long: &str, name: &str) -> String { - // The short label is three bytes, and is followed by a space. That - // leaves 11 bytes for the CGU name. How we obtain those 11 bytes - // depends on the CGU name form. - // - // - Non-incremental, e.g. `regex.f10ba03eb5ec7975-cgu.0`: the part - // before the `-cgu.0` is the same for every CGU, so use the - // `cgu.0` part. The number suffix will be different for each - // CGU. - // - // - Incremental (normal), e.g. `2i52vvl2hco29us0`: use the whole - // name because each CGU will have a unique ASCII hash, and the - // first 11 bytes will be enough to identify it. - // - // - Incremental (with `-Zhuman-readable-cgu-names`), e.g. - // `regex.f10ba03eb5ec7975-re_builder.volatile`: use the whole - // name. The first 11 bytes won't be enough to uniquely identify - // it, but no obvious substring will, and this is a rarely used - // option so it doesn't matter much. - // - assert_eq!(short.len(), 3); - let name = if let Some(index) = name.find("-cgu.") { - &name[index + 1..] // +1 skips the leading '-'. - } else { - name - }; - format!("{short} {name}") - } - - // Windows has no thread name length limit, so use more descriptive names. - #[cfg(windows)] - fn desc(_short: &str, long: &str, name: &str) -> String { - format!("{long} {name}") - } - match self { WorkItem::Optimize(m) => desc("opt", "optimize module", &m.name), WorkItem::CopyPostLtoArtifacts(m) => desc("cpy", "copy LTO artifacts for", &m.name), From a6725ab7b339e23f8cd9a7c5240dc08b89a2eae8 Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Sun, 6 Jul 2025 17:57:41 +0000 Subject: [PATCH 2/5] Move fat LTO out of the main coordinator loop --- compiler/rustc_codegen_ssa/src/back/write.rs | 99 +++++++++----------- 1 file changed, 42 insertions(+), 57 deletions(-) diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs index 931dc96aaa8e5..b0076c72ccbd2 100644 --- a/compiler/rustc_codegen_ssa/src/back/write.rs +++ b/compiler/rustc_codegen_ssa/src/back/write.rs @@ -703,13 +703,6 @@ pub(crate) enum WorkItem { /// Copy the post-LTO artifacts from the incremental cache to the output /// directory. CopyPostLtoArtifacts(CachedModuleCodegen), - /// Performs fat LTO on the given module. - FatLto { - exported_symbols_for_lto: Arc>, - each_linked_rlib_for_lto: Vec, - needs_fat_lto: Vec>, - import_only_modules: Vec<(SerializedModule, WorkProduct)>, - }, /// Performs thin-LTO on the given module. ThinLto(lto::ThinModule), } @@ -759,7 +752,6 @@ impl WorkItem { match self { WorkItem::Optimize(m) => desc("opt", "optimize module", &m.name), WorkItem::CopyPostLtoArtifacts(m) => desc("cpy", "copy LTO artifacts for", &m.name), - WorkItem::FatLto { .. } => desc("lto", "fat LTO module", "everything"), WorkItem::ThinLto(m) => desc("lto", "thin-LTO module", m.name()), } } @@ -976,15 +968,17 @@ fn execute_copy_from_cache_work_item( }) } -fn execute_fat_lto_work_item( +fn do_fat_lto( cgcx: &CodegenContext, exported_symbols_for_lto: &[String], each_linked_rlib_for_lto: &[PathBuf], mut needs_fat_lto: Vec>, import_only_modules: Vec<(SerializedModule, WorkProduct)>, -) -> WorkItemResult { +) -> CompiledModule { let _timer = cgcx.prof.generic_activity_with_arg("codegen_module_perform_lto", "everything"); + check_lto_allowed(&cgcx); + for (module, wp) in import_only_modules { needs_fat_lto.push(FatLtoInput::Serialized { name: wp.cgu_name, buffer: module }) } @@ -995,8 +989,7 @@ fn execute_fat_lto_work_item( each_linked_rlib_for_lto, needs_fat_lto, ); - let module = B::codegen(cgcx, module, &cgcx.module_config); - WorkItemResult::Finished(module) + B::codegen(cgcx, module, &cgcx.module_config) } fn execute_thin_lto_work_item( @@ -1421,45 +1414,30 @@ fn start_executing_work( assert!(!started_lto); started_lto = true; - let needs_fat_lto = mem::take(&mut needs_fat_lto); - let needs_thin_lto = mem::take(&mut needs_thin_lto); - let import_only_modules = mem::take(&mut lto_import_only_modules); - let each_linked_rlib_file_for_lto = - mem::take(&mut each_linked_rlib_file_for_lto); + if !needs_fat_lto.is_empty() { + // We're doing fat LTO outside of the main loop. + break; + } check_lto_allowed(&cgcx); - if !needs_fat_lto.is_empty() { - assert!(needs_thin_lto.is_empty()); - - work_items.push(( - WorkItem::FatLto { - exported_symbols_for_lto: Arc::clone(&exported_symbols_for_lto), - each_linked_rlib_for_lto: each_linked_rlib_file_for_lto, - needs_fat_lto, - import_only_modules, - }, - 0, - )); + let needs_thin_lto = mem::take(&mut needs_thin_lto); + let import_only_modules = mem::take(&mut lto_import_only_modules); + + for (work, cost) in generate_thin_lto_work( + &cgcx, + &exported_symbols_for_lto, + &each_linked_rlib_file_for_lto, + needs_thin_lto, + import_only_modules, + ) { + let insertion_index = work_items + .binary_search_by_key(&cost, |&(_, cost)| cost) + .unwrap_or_else(|e| e); + work_items.insert(insertion_index, (work, cost)); if cgcx.parallel { helper.request_token(); } - } else { - for (work, cost) in generate_thin_lto_work( - &cgcx, - &exported_symbols_for_lto, - &each_linked_rlib_file_for_lto, - needs_thin_lto, - import_only_modules, - ) { - let insertion_index = work_items - .binary_search_by_key(&cost, |&(_, cost)| cost) - .unwrap_or_else(|e| e); - work_items.insert(insertion_index, (work, cost)); - if cgcx.parallel { - helper.request_token(); - } - } } } @@ -1633,6 +1611,25 @@ fn start_executing_work( return Err(()); } + drop(codegen_state); + drop(tokens); + drop(helper); + + if !needs_fat_lto.is_empty() { + assert!(compiled_modules.is_empty()); + assert!(needs_thin_lto.is_empty()); + + // This uses the implicit token + let module = do_fat_lto( + &cgcx, + &exported_symbols_for_lto, + &each_linked_rlib_file_for_lto, + needs_fat_lto, + lto_import_only_modules, + ); + compiled_modules.push(module); + } + // Drop to print timings drop(llvm_start_time); @@ -1726,18 +1723,6 @@ fn spawn_work<'a, B: ExtraBackendMethods>( let result = std::panic::catch_unwind(AssertUnwindSafe(|| match work { WorkItem::Optimize(m) => execute_optimize_work_item(&cgcx, m), WorkItem::CopyPostLtoArtifacts(m) => execute_copy_from_cache_work_item(&cgcx, m), - WorkItem::FatLto { - exported_symbols_for_lto, - each_linked_rlib_for_lto, - needs_fat_lto, - import_only_modules, - } => execute_fat_lto_work_item( - &cgcx, - &exported_symbols_for_lto, - &each_linked_rlib_for_lto, - needs_fat_lto, - import_only_modules, - ), WorkItem::ThinLto(m) => execute_thin_lto_work_item(&cgcx, m), })); From 3cf3ec667a656e144ca28fdf4f476ee27c94b3be Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Wed, 9 Jul 2025 15:11:55 +0000 Subject: [PATCH 3/5] Move thin LTO out of the main loop too --- compiler/rustc_codegen_ssa/src/back/write.rs | 217 ++++++++++++++----- 1 file changed, 167 insertions(+), 50 deletions(-) diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs index b0076c72ccbd2..262878d6707ee 100644 --- a/compiler/rustc_codegen_ssa/src/back/write.rs +++ b/compiler/rustc_codegen_ssa/src/back/write.rs @@ -15,8 +15,8 @@ use rustc_data_structures::profiling::{SelfProfilerRef, VerboseTimingGuard}; use rustc_errors::emitter::Emitter; use rustc_errors::translation::Translator; use rustc_errors::{ - Diag, DiagArgMap, DiagCtxt, DiagMessage, ErrCode, FatalErrorMarker, Level, MultiSpan, Style, - Suggestions, + Diag, DiagArgMap, DiagCtxt, DiagMessage, ErrCode, FatalError, FatalErrorMarker, Level, + MultiSpan, Style, Suggestions, }; use rustc_fs_util::link_or_copy; use rustc_incremental::{ @@ -992,6 +992,155 @@ fn do_fat_lto( B::codegen(cgcx, module, &cgcx.module_config) } +fn do_thin_lto<'a, B: ExtraBackendMethods>( + cgcx: &'a CodegenContext, + llvm_start_time: &mut Option>, + exported_symbols_for_lto: Arc>, + each_linked_rlib_for_lto: Vec, + needs_thin_lto: Vec<(String, ::ThinBuffer)>, + lto_import_only_modules: Vec<( + SerializedModule<::ModuleBuffer>, + WorkProduct, + )>, +) -> Vec { + check_lto_allowed(&cgcx); + + let (coordinator_send, coordinator_receive) = channel(); + + // First up, convert our jobserver into a helper thread so we can use normal + // mpsc channels to manage our messages and such. + // After we've requested tokens then we'll, when we can, + // get tokens on `coordinator_receive` which will + // get managed in the main loop below. + let coordinator_send2 = coordinator_send.clone(); + let helper = jobserver::client() + .into_helper_thread(move |token| { + drop(coordinator_send2.send(Message::Token::(token))); + }) + .expect("failed to spawn helper thread"); + + let mut work_items = vec![]; + + // We have LTO work to do. Perform the serial work here of + // figuring out what we're going to LTO and then push a + // bunch of work items onto our queue to do LTO. This all + // happens on the coordinator thread but it's very quick so + // we don't worry about tokens. + for (work, cost) in generate_thin_lto_work( + cgcx, + &exported_symbols_for_lto, + &each_linked_rlib_for_lto, + needs_thin_lto, + lto_import_only_modules, + ) { + let insertion_index = + work_items.binary_search_by_key(&cost, |&(_, cost)| cost).unwrap_or_else(|e| e); + work_items.insert(insertion_index, (work, cost)); + if cgcx.parallel { + helper.request_token(); + } + } + + let mut codegen_aborted = None; + + // These are the Jobserver Tokens we currently hold. Does not include + // the implicit Token the compiler process owns no matter what. + let mut tokens = vec![]; + + // Amount of tokens that are used (including the implicit token). + let mut used_token_count = 0; + + let mut compiled_modules = vec![]; + + // Run the message loop while there's still anything that needs message + // processing. Note that as soon as codegen is aborted we simply want to + // wait for all existing work to finish, so many of the conditions here + // only apply if codegen hasn't been aborted as they represent pending + // work to be done. + loop { + if codegen_aborted.is_none() { + if used_token_count == 0 && work_items.is_empty() { + // All codegen work is done. + break; + } + + // Spin up what work we can, only doing this while we've got available + // parallelism slots and work left to spawn. + while used_token_count < tokens.len() + 1 + && let Some((item, _)) = work_items.pop() + { + spawn_work(&cgcx, coordinator_send.clone(), llvm_start_time, item); + used_token_count += 1; + } + } else { + // Don't queue up any more work if codegen was aborted, we're + // just waiting for our existing children to finish. + if used_token_count == 0 { + break; + } + } + + // Relinquish accidentally acquired extra tokens. Subtract 1 for the implicit token. + tokens.truncate(used_token_count.saturating_sub(1)); + + match coordinator_receive.recv().unwrap() { + // Save the token locally and the next turn of the loop will use + // this to spawn a new unit of work, or it may get dropped + // immediately if we have no more work to spawn. + Message::Token(token) => match token { + Ok(token) => { + tokens.push(token); + } + Err(e) => { + let msg = &format!("failed to acquire jobserver token: {e}"); + cgcx.diag_emitter.fatal(msg); + codegen_aborted = Some(FatalError); + } + }, + + Message::CodegenDone { .. } + | Message::CodegenComplete + | Message::CodegenAborted + | Message::AddImportOnlyModule { .. } => { + unreachable!() + } + + Message::WorkItem { result } => { + // If a thread exits successfully then we drop a token associated + // with that worker and update our `used_token_count` count. + // We may later re-acquire a token to continue running more work. + // We may also not actually drop a token here if the worker was + // running with an "ephemeral token". + used_token_count -= 1; + + match result { + Ok(WorkItemResult::Finished(compiled_module)) => { + compiled_modules.push(compiled_module); + } + Ok(WorkItemResult::NeedsFatLto(_)) | Ok(WorkItemResult::NeedsThinLto(_, _)) => { + unreachable!() + } + Err(Some(WorkerFatalError)) => { + // Like `CodegenAborted`, wait for remaining work to finish. + codegen_aborted = Some(FatalError); + } + Err(None) => { + // If the thread failed that means it panicked, so + // we abort immediately. + bug!("worker thread panicked"); + } + } + } + } + } + + if let Some(codegen_aborted) = codegen_aborted { + codegen_aborted.raise(); + } + + compiled_modules +} + fn execute_thin_lto_work_item( cgcx: &CodegenContext, module: lto::ThinModule, @@ -1085,9 +1234,8 @@ fn start_executing_work( regular_config: Arc, allocator_config: Arc, allocator_module: Option>, - tx_to_llvm_workers: Sender>, + coordinator_send: Sender>, ) -> thread::JoinHandle> { - let coordinator_send = tx_to_llvm_workers; let sess = tcx.sess; let mut each_linked_rlib_for_lto = Vec::new(); @@ -1307,7 +1455,6 @@ fn start_executing_work( let mut needs_fat_lto = Vec::new(); let mut needs_thin_lto = Vec::new(); let mut lto_import_only_modules = Vec::new(); - let mut started_lto = false; /// Possible state transitions: /// - Ongoing -> Completed @@ -1397,48 +1544,8 @@ fn start_executing_work( if running_with_any_token(main_thread_state, running_with_own_token) == 0 && work_items.is_empty() { - // All codegen work is done. Do we have LTO work to do? - if needs_fat_lto.is_empty() - && needs_thin_lto.is_empty() - && lto_import_only_modules.is_empty() - { - // Nothing more to do! - break; - } - - // We have LTO work to do. Perform the serial work here of - // figuring out what we're going to LTO and then push a - // bunch of work items onto our queue to do LTO. This all - // happens on the coordinator thread but it's very quick so - // we don't worry about tokens. - assert!(!started_lto); - started_lto = true; - - if !needs_fat_lto.is_empty() { - // We're doing fat LTO outside of the main loop. - break; - } - - check_lto_allowed(&cgcx); - - let needs_thin_lto = mem::take(&mut needs_thin_lto); - let import_only_modules = mem::take(&mut lto_import_only_modules); - - for (work, cost) in generate_thin_lto_work( - &cgcx, - &exported_symbols_for_lto, - &each_linked_rlib_file_for_lto, - needs_thin_lto, - import_only_modules, - ) { - let insertion_index = work_items - .binary_search_by_key(&cost, |&(_, cost)| cost) - .unwrap_or_else(|e| e); - work_items.insert(insertion_index, (work, cost)); - if cgcx.parallel { - helper.request_token(); - } - } + // All codegen work is done. + break; } // In this branch, we know that everything has been codegened, @@ -1576,12 +1683,10 @@ fn start_executing_work( compiled_modules.push(compiled_module); } Ok(WorkItemResult::NeedsFatLto(fat_lto_input)) => { - assert!(!started_lto); assert!(needs_thin_lto.is_empty()); needs_fat_lto.push(fat_lto_input); } Ok(WorkItemResult::NeedsThinLto(name, thin_buffer)) => { - assert!(!started_lto); assert!(needs_fat_lto.is_empty()); needs_thin_lto.push((name, thin_buffer)); } @@ -1598,7 +1703,6 @@ fn start_executing_work( } Message::AddImportOnlyModule { module_data, work_product } => { - assert!(!started_lto); assert_eq!(codegen_state, Ongoing); assert_eq!(main_thread_state, MainThreadState::Codegenning); lto_import_only_modules.push((module_data, work_product)); @@ -1614,6 +1718,7 @@ fn start_executing_work( drop(codegen_state); drop(tokens); drop(helper); + assert!(work_items.is_empty()); if !needs_fat_lto.is_empty() { assert!(compiled_modules.is_empty()); @@ -1628,6 +1733,18 @@ fn start_executing_work( lto_import_only_modules, ); compiled_modules.push(module); + } else if !needs_thin_lto.is_empty() || !lto_import_only_modules.is_empty() { + assert!(compiled_modules.is_empty()); + assert!(needs_fat_lto.is_empty()); + + compiled_modules.extend(do_thin_lto( + &cgcx, + &mut llvm_start_time, + exported_symbols_for_lto, + each_linked_rlib_file_for_lto, + needs_thin_lto, + lto_import_only_modules, + )); } // Drop to print timings From 53867f23b2f332413ed2a7514a44ee40330e6467 Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Thu, 31 Jul 2025 10:38:42 +0000 Subject: [PATCH 4/5] Separate thin LTO message and work item types --- compiler/rustc_codegen_ssa/src/back/write.rs | 109 ++++++++++++++----- 1 file changed, 80 insertions(+), 29 deletions(-) diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs index 262878d6707ee..7f663df1d148b 100644 --- a/compiler/rustc_codegen_ssa/src/back/write.rs +++ b/compiler/rustc_codegen_ssa/src/back/write.rs @@ -380,7 +380,7 @@ fn generate_thin_lto_work( each_linked_rlib_for_lto: &[PathBuf], needs_thin_lto: Vec<(String, B::ThinBuffer)>, import_only_modules: Vec<(SerializedModule, WorkProduct)>, -) -> Vec<(WorkItem, u64)> { +) -> Vec<(ThinLtoWorkItem, u64)> { let _prof_timer = cgcx.prof.generic_activity("codegen_thin_generate_lto_work"); let (lto_modules, copy_jobs) = B::run_thin_lto( @@ -394,11 +394,11 @@ fn generate_thin_lto_work( .into_iter() .map(|module| { let cost = module.cost(); - (WorkItem::ThinLto(module), cost) + (ThinLtoWorkItem::ThinLto(module), cost) }) .chain(copy_jobs.into_iter().map(|wp| { ( - WorkItem::CopyPostLtoArtifacts(CachedModuleCodegen { + ThinLtoWorkItem::CopyPostLtoArtifacts(CachedModuleCodegen { name: wp.cgu_name.clone(), source: wp, }), @@ -703,6 +703,12 @@ pub(crate) enum WorkItem { /// Copy the post-LTO artifacts from the incremental cache to the output /// directory. CopyPostLtoArtifacts(CachedModuleCodegen), +} + +enum ThinLtoWorkItem { + /// Copy the post-LTO artifacts from the incremental cache to the output + /// directory. + CopyPostLtoArtifacts(CachedModuleCodegen), /// Performs thin-LTO on the given module. ThinLto(lto::ThinModule), } @@ -752,7 +758,18 @@ impl WorkItem { match self { WorkItem::Optimize(m) => desc("opt", "optimize module", &m.name), WorkItem::CopyPostLtoArtifacts(m) => desc("cpy", "copy LTO artifacts for", &m.name), - WorkItem::ThinLto(m) => desc("lto", "thin-LTO module", m.name()), + } + } +} + +impl ThinLtoWorkItem { + /// Generate a short description of this work item suitable for use as a thread name. + fn short_description(&self) -> String { + match self { + ThinLtoWorkItem::CopyPostLtoArtifacts(m) => { + desc("cpy", "copy LTO artifacts for", &m.name) + } + ThinLtoWorkItem::ThinLto(m) => desc("lto", "thin-LTO module", m.name()), } } } @@ -883,7 +900,7 @@ fn execute_optimize_work_item( fn execute_copy_from_cache_work_item( cgcx: &CodegenContext, module: CachedModuleCodegen, -) -> WorkItemResult { +) -> CompiledModule { let _timer = cgcx .prof .generic_activity_with_arg("codegen_copy_artifacts_from_incr_cache", &*module.name); @@ -956,7 +973,7 @@ fn execute_copy_from_cache_work_item( cgcx.create_dcx().handle().emit_fatal(errors::NoSavedObjectFile { cgu_name: &module.name }) } - WorkItemResult::Finished(CompiledModule { + CompiledModule { links_from_incr_cache, kind: ModuleKind::Regular, name: module.name, @@ -965,7 +982,7 @@ fn execute_copy_from_cache_work_item( bytecode, assembly, llvm_ir, - }) + } } fn do_fat_lto( @@ -1015,7 +1032,7 @@ fn do_thin_lto<'a, B: ExtraBackendMethods>( let coordinator_send2 = coordinator_send.clone(); let helper = jobserver::client() .into_helper_thread(move |token| { - drop(coordinator_send2.send(Message::Token::(token))); + drop(coordinator_send2.send(ThinLtoMessage::Token(token))); }) .expect("failed to spawn helper thread"); @@ -1069,7 +1086,7 @@ fn do_thin_lto<'a, B: ExtraBackendMethods>( while used_token_count < tokens.len() + 1 && let Some((item, _)) = work_items.pop() { - spawn_work(&cgcx, coordinator_send.clone(), llvm_start_time, item); + spawn_thin_lto_work(&cgcx, coordinator_send.clone(), llvm_start_time, item); used_token_count += 1; } } else { @@ -1087,7 +1104,7 @@ fn do_thin_lto<'a, B: ExtraBackendMethods>( // Save the token locally and the next turn of the loop will use // this to spawn a new unit of work, or it may get dropped // immediately if we have no more work to spawn. - Message::Token(token) => match token { + ThinLtoMessage::Token(token) => match token { Ok(token) => { tokens.push(token); } @@ -1098,14 +1115,7 @@ fn do_thin_lto<'a, B: ExtraBackendMethods>( } }, - Message::CodegenDone { .. } - | Message::CodegenComplete - | Message::CodegenAborted - | Message::AddImportOnlyModule { .. } => { - unreachable!() - } - - Message::WorkItem { result } => { + ThinLtoMessage::WorkItem { result } => { // If a thread exits successfully then we drop a token associated // with that worker and update our `used_token_count` count. // We may later re-acquire a token to continue running more work. @@ -1114,12 +1124,7 @@ fn do_thin_lto<'a, B: ExtraBackendMethods>( used_token_count -= 1; match result { - Ok(WorkItemResult::Finished(compiled_module)) => { - compiled_modules.push(compiled_module); - } - Ok(WorkItemResult::NeedsFatLto(_)) | Ok(WorkItemResult::NeedsThinLto(_, _)) => { - unreachable!() - } + Ok(compiled_module) => compiled_modules.push(compiled_module), Err(Some(WorkerFatalError)) => { // Like `CodegenAborted`, wait for remaining work to finish. codegen_aborted = Some(FatalError); @@ -1144,12 +1149,11 @@ fn do_thin_lto<'a, B: ExtraBackendMethods>( fn execute_thin_lto_work_item( cgcx: &CodegenContext, module: lto::ThinModule, -) -> WorkItemResult { +) -> CompiledModule { let _timer = cgcx.prof.generic_activity_with_arg("codegen_module_perform_lto", module.name()); let module = B::optimize_thin(cgcx, module); - let module = B::codegen(cgcx, module, &cgcx.module_config); - WorkItemResult::Finished(module) + B::codegen(cgcx, module, &cgcx.module_config) } /// Messages sent to the coordinator. @@ -1183,6 +1187,17 @@ pub(crate) enum Message { CodegenAborted, } +/// Messages sent to the coordinator. +pub(crate) enum ThinLtoMessage { + /// A jobserver token has become available. Sent from the jobserver helper + /// thread. + Token(io::Result), + + /// The backend has finished processing a work item for a codegen unit. + /// Sent from a backend worker thread. + WorkItem { result: Result> }, +} + /// A message sent from the coordinator thread to the main thread telling it to /// process another codegen unit. pub struct CguMessage; @@ -1839,8 +1854,9 @@ fn spawn_work<'a, B: ExtraBackendMethods>( B::spawn_named_thread(cgcx.time_trace, work.short_description(), move || { let result = std::panic::catch_unwind(AssertUnwindSafe(|| match work { WorkItem::Optimize(m) => execute_optimize_work_item(&cgcx, m), - WorkItem::CopyPostLtoArtifacts(m) => execute_copy_from_cache_work_item(&cgcx, m), - WorkItem::ThinLto(m) => execute_thin_lto_work_item(&cgcx, m), + WorkItem::CopyPostLtoArtifacts(m) => { + WorkItemResult::Finished(execute_copy_from_cache_work_item(&cgcx, m)) + } })); let msg = match result { @@ -1860,6 +1876,41 @@ fn spawn_work<'a, B: ExtraBackendMethods>( .expect("failed to spawn work thread"); } +fn spawn_thin_lto_work<'a, B: ExtraBackendMethods>( + cgcx: &'a CodegenContext, + coordinator_send: Sender, + llvm_start_time: &mut Option>, + work: ThinLtoWorkItem, +) { + if llvm_start_time.is_none() { + *llvm_start_time = Some(cgcx.prof.verbose_generic_activity("LLVM_passes")); + } + + let cgcx = cgcx.clone(); + + B::spawn_named_thread(cgcx.time_trace, work.short_description(), move || { + let result = std::panic::catch_unwind(AssertUnwindSafe(|| match work { + ThinLtoWorkItem::CopyPostLtoArtifacts(m) => execute_copy_from_cache_work_item(&cgcx, m), + ThinLtoWorkItem::ThinLto(m) => execute_thin_lto_work_item(&cgcx, m), + })); + + let msg = match result { + Ok(result) => ThinLtoMessage::WorkItem { result: Ok(result) }, + + // We ignore any `FatalError` coming out of `execute_work_item`, as a + // diagnostic was already sent off to the main thread - just surface + // that there was an error in this worker. + Err(err) if err.is::() => { + ThinLtoMessage::WorkItem { result: Err(Some(WorkerFatalError)) } + } + + Err(_) => ThinLtoMessage::WorkItem { result: Err(None) }, + }; + drop(coordinator_send.send(msg)); + }) + .expect("failed to spawn work thread"); +} + enum SharedEmitterMessage { Diagnostic(Diagnostic), InlineAsmError(SpanData, String, Level, Option<(String, Vec)>), From a077dbd686809016bd143b66ce73f768725b6431 Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Thu, 4 Sep 2025 13:08:33 +0000 Subject: [PATCH 5/5] Better timers for LTO --- compiler/rustc_codegen_ssa/src/back/write.rs | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs index 7f663df1d148b..af1dd99a8e146 100644 --- a/compiler/rustc_codegen_ssa/src/back/write.rs +++ b/compiler/rustc_codegen_ssa/src/back/write.rs @@ -992,7 +992,7 @@ fn do_fat_lto( mut needs_fat_lto: Vec>, import_only_modules: Vec<(SerializedModule, WorkProduct)>, ) -> CompiledModule { - let _timer = cgcx.prof.generic_activity_with_arg("codegen_module_perform_lto", "everything"); + let _timer = cgcx.prof.verbose_generic_activity("LLVM_fatlto"); check_lto_allowed(&cgcx); @@ -1011,7 +1011,6 @@ fn do_fat_lto( fn do_thin_lto<'a, B: ExtraBackendMethods>( cgcx: &'a CodegenContext, - llvm_start_time: &mut Option>, exported_symbols_for_lto: Arc>, each_linked_rlib_for_lto: Vec, needs_thin_lto: Vec<(String, ::ThinBuffer)>, @@ -1020,6 +1019,8 @@ fn do_thin_lto<'a, B: ExtraBackendMethods>( WorkProduct, )>, ) -> Vec { + let _timer = cgcx.prof.verbose_generic_activity("LLVM_thinlto"); + check_lto_allowed(&cgcx); let (coordinator_send, coordinator_receive) = channel(); @@ -1086,7 +1087,7 @@ fn do_thin_lto<'a, B: ExtraBackendMethods>( while used_token_count < tokens.len() + 1 && let Some((item, _)) = work_items.pop() { - spawn_thin_lto_work(&cgcx, coordinator_send.clone(), llvm_start_time, item); + spawn_thin_lto_work(&cgcx, coordinator_send.clone(), item); used_token_count += 1; } } else { @@ -1726,6 +1727,9 @@ fn start_executing_work( } } + // Drop to print timings + drop(llvm_start_time); + if codegen_state == Aborted { return Err(()); } @@ -1754,7 +1758,6 @@ fn start_executing_work( compiled_modules.extend(do_thin_lto( &cgcx, - &mut llvm_start_time, exported_symbols_for_lto, each_linked_rlib_file_for_lto, needs_thin_lto, @@ -1762,9 +1765,6 @@ fn start_executing_work( )); } - // Drop to print timings - drop(llvm_start_time); - // Regardless of what order these modules completed in, report them to // the backend in the same order every time to ensure that we're handing // out deterministic results. @@ -1879,13 +1879,8 @@ fn spawn_work<'a, B: ExtraBackendMethods>( fn spawn_thin_lto_work<'a, B: ExtraBackendMethods>( cgcx: &'a CodegenContext, coordinator_send: Sender, - llvm_start_time: &mut Option>, work: ThinLtoWorkItem, ) { - if llvm_start_time.is_none() { - *llvm_start_time = Some(cgcx.prof.verbose_generic_activity("LLVM_passes")); - } - let cgcx = cgcx.clone(); B::spawn_named_thread(cgcx.time_trace, work.short_description(), move || {