From b1d5922285b41259bd4a4b1b788ec679f1781ea9 Mon Sep 17 00:00:00 2001
From: bjorn3 <17426603+bjorn3@users.noreply.github.com>
Date: Thu, 4 Sep 2025 09:36:27 +0000
Subject: [PATCH 1/5] Move desc out of WorkItem::short_description to allow
 reusing in a future commit

---
 compiler/rustc_codegen_ssa/src/back/write.rs | 78 ++++++++++----------
 1 file changed, 39 insertions(+), 39 deletions(-)
diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs
index cbaf67d734547..931dc96aaa8e5 100644
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@@ -714,48 +714,48 @@ pub(crate) enum WorkItem<B: WriteBackendMethods> {
     ThinLto(lto::ThinModule<B>),
 }
 
+// `pthread_setname()` on *nix ignores anything beyond the first 15
+// bytes. Use short descriptions to maximize the space available for
+// the module name.
+#[cfg(not(windows))]
+fn desc(short: &str, _long: &str, name: &str) -> String {
+    // The short label is three bytes, and is followed by a space. That
+    // leaves 11 bytes for the CGU name. How we obtain those 11 bytes
+    // depends on the CGU name form.
+    //
+    // - Non-incremental, e.g. `regex.f10ba03eb5ec7975-cgu.0`: the part
+    //   before the `-cgu.0` is the same for every CGU, so use the
+    //   `cgu.0` part. The number suffix will be different for each
+    //   CGU.
+    //
+    // - Incremental (normal), e.g. `2i52vvl2hco29us0`: use the whole
+    //   name because each CGU will have a unique ASCII hash, and the
+    //   first 11 bytes will be enough to identify it.
+    //
+    // - Incremental (with `-Zhuman-readable-cgu-names`), e.g.
+    //   `regex.f10ba03eb5ec7975-re_builder.volatile`: use the whole
+    //   name. The first 11 bytes won't be enough to uniquely identify
+    //   it, but no obvious substring will, and this is a rarely used
+    //   option so it doesn't matter much.
+    //
+    assert_eq!(short.len(), 3);
+    let name = if let Some(index) = name.find("-cgu.") {
+        &name[index + 1..] // +1 skips the leading '-'.
+    } else {
+        name
+    };
+    format!("{short} {name}")
+}
+
+// Windows has no thread name length limit, so use more descriptive names.
+#[cfg(windows)]
+fn desc(_short: &str, long: &str, name: &str) -> String {
+    format!("{long} {name}")
+}
+
 impl<B: WriteBackendMethods> WorkItem<B> {
     /// Generate a short description of this work item suitable for use as a thread name.
     fn short_description(&self) -> String {
-        // `pthread_setname()` on *nix ignores anything beyond the first 15
-        // bytes. Use short descriptions to maximize the space available for
-        // the module name.
-        #[cfg(not(windows))]
-        fn desc(short: &str, _long: &str, name: &str) -> String {
-            // The short label is three bytes, and is followed by a space. That
-            // leaves 11 bytes for the CGU name. How we obtain those 11 bytes
-            // depends on the CGU name form.
-            //
-            // - Non-incremental, e.g. `regex.f10ba03eb5ec7975-cgu.0`: the part
-            //   before the `-cgu.0` is the same for every CGU, so use the
-            //   `cgu.0` part. The number suffix will be different for each
-            //   CGU.
-            //
-            // - Incremental (normal), e.g. `2i52vvl2hco29us0`: use the whole
-            //   name because each CGU will have a unique ASCII hash, and the
-            //   first 11 bytes will be enough to identify it.
-            //
-            // - Incremental (with `-Zhuman-readable-cgu-names`), e.g.
-            //   `regex.f10ba03eb5ec7975-re_builder.volatile`: use the whole
-            //   name. The first 11 bytes won't be enough to uniquely identify
-            //   it, but no obvious substring will, and this is a rarely used
-            //   option so it doesn't matter much.
-            //
-            assert_eq!(short.len(), 3);
-            let name = if let Some(index) = name.find("-cgu.") {
-                &name[index + 1..] // +1 skips the leading '-'.
-            } else {
-                name
-            };
-            format!("{short} {name}")
-        }
-
-        // Windows has no thread name length limit, so use more descriptive names.
-        #[cfg(windows)]
-        fn desc(_short: &str, long: &str, name: &str) -> String {
-            format!("{long} {name}")
-        }
-
         match self {
             WorkItem::Optimize(m) => desc("opt", "optimize module", &m.name),
             WorkItem::CopyPostLtoArtifacts(m) => desc("cpy", "copy LTO artifacts for", &m.name),

From a6725ab7b339e23f8cd9a7c5240dc08b89a2eae8 Mon Sep 17 00:00:00 2001
From: bjorn3 <17426603+bjorn3@users.noreply.github.com>
Date: Sun, 6 Jul 2025 17:57:41 +0000
Subject: [PATCH 2/5] Move fat LTO out of the main coordinator loop

---
 compiler/rustc_codegen_ssa/src/back/write.rs | 99 +++++++++-----------
 1 file changed, 42 insertions(+), 57 deletions(-)

diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs
index 931dc96aaa8e5..b0076c72ccbd2 100644
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@@ -703,13 +703,6 @@ pub(crate) enum WorkItem<B: WriteBackendMethods> {
     /// Copy the post-LTO artifacts from the incremental cache to the output
     /// directory.
     CopyPostLtoArtifacts(CachedModuleCodegen),
-    /// Performs fat LTO on the given module.
-    FatLto {
-        exported_symbols_for_lto: Arc<Vec<String>>,
-        each_linked_rlib_for_lto: Vec<PathBuf>,
-        needs_fat_lto: Vec<FatLtoInput<B>>,
-        import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>,
-    },
     /// Performs thin-LTO on the given module.
     ThinLto(lto::ThinModule<B>),
 }
@@ -759,7 +752,6 @@ impl<B: WriteBackendMethods> WorkItem<B> {
         match self {
             WorkItem::Optimize(m) => desc("opt", "optimize module", &m.name),
             WorkItem::CopyPostLtoArtifacts(m) => desc("cpy", "copy LTO artifacts for", &m.name),
-            WorkItem::FatLto { .. } => desc("lto", "fat LTO module", "everything"),
             WorkItem::ThinLto(m) => desc("lto", "thin-LTO module", m.name()),
         }
     }
@@ -976,15 +968,17 @@ fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>(
     })
 }
 
-fn execute_fat_lto_work_item<B: ExtraBackendMethods>(
+fn do_fat_lto<B: ExtraBackendMethods>(
     cgcx: &CodegenContext<B>,
     exported_symbols_for_lto: &[String],
     each_linked_rlib_for_lto: &[PathBuf],
     mut needs_fat_lto: Vec<FatLtoInput<B>>,
     import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>,
-) -> WorkItemResult<B> {
+) -> CompiledModule {
     let _timer = cgcx.prof.generic_activity_with_arg("codegen_module_perform_lto", "everything");
 
+    check_lto_allowed(&cgcx);
+
     for (module, wp) in import_only_modules {
         needs_fat_lto.push(FatLtoInput::Serialized { name: wp.cgu_name, buffer: module })
     }
@@ -995,8 +989,7 @@ fn execute_fat_lto_work_item<B: ExtraBackendMethods>(
         each_linked_rlib_for_lto,
         needs_fat_lto,
     );
-    let module = B::codegen(cgcx, module, &cgcx.module_config);
-    WorkItemResult::Finished(module)
+    B::codegen(cgcx, module, &cgcx.module_config)
 }
 
 fn execute_thin_lto_work_item<B: ExtraBackendMethods>(
@@ -1421,45 +1414,30 @@ fn start_executing_work<B: ExtraBackendMethods>(
                     assert!(!started_lto);
                     started_lto = true;
 
-                    let needs_fat_lto = mem::take(&mut needs_fat_lto);
-                    let needs_thin_lto = mem::take(&mut needs_thin_lto);
-                    let import_only_modules = mem::take(&mut lto_import_only_modules);
-                    let each_linked_rlib_file_for_lto =
-                        mem::take(&mut each_linked_rlib_file_for_lto);
+                    if !needs_fat_lto.is_empty() {
+                        // We're doing fat LTO outside of the main loop.
+                        break;
+                    }
 
                     check_lto_allowed(&cgcx);
 
-                    if !needs_fat_lto.is_empty() {
-                        assert!(needs_thin_lto.is_empty());
-
-                        work_items.push((
-                            WorkItem::FatLto {
-                                exported_symbols_for_lto: Arc::clone(&exported_symbols_for_lto),
-                                each_linked_rlib_for_lto: each_linked_rlib_file_for_lto,
-                                needs_fat_lto,
-                                import_only_modules,
-                            },
-                            0,
-                        ));
+                    let needs_thin_lto = mem::take(&mut needs_thin_lto);
+                    let import_only_modules = mem::take(&mut lto_import_only_modules);
+
+                    for (work, cost) in generate_thin_lto_work(
+                        &cgcx,
+                        &exported_symbols_for_lto,
+                        &each_linked_rlib_file_for_lto,
+                        needs_thin_lto,
+                        import_only_modules,
+                    ) {
+                        let insertion_index = work_items
+                            .binary_search_by_key(&cost, |&(_, cost)| cost)
+                            .unwrap_or_else(|e| e);
+                        work_items.insert(insertion_index, (work, cost));
                         if cgcx.parallel {
                             helper.request_token();
                         }
-                    } else {
-                        for (work, cost) in generate_thin_lto_work(
-                            &cgcx,
-                            &exported_symbols_for_lto,
-                            &each_linked_rlib_file_for_lto,
-                            needs_thin_lto,
-                            import_only_modules,
-                        ) {
-                            let insertion_index = work_items
-                                .binary_search_by_key(&cost, |&(_, cost)| cost)
-                                .unwrap_or_else(|e| e);
-                            work_items.insert(insertion_index, (work, cost));
-                            if cgcx.parallel {
-                                helper.request_token();
-                            }
-                        }
                     }
                 }
 
@@ -1633,6 +1611,25 @@ fn start_executing_work<B: ExtraBackendMethods>(
             return Err(());
         }
 
+        drop(codegen_state);
+        drop(tokens);
+        drop(helper);
+
+        if !needs_fat_lto.is_empty() {
+            assert!(compiled_modules.is_empty());
+            assert!(needs_thin_lto.is_empty());
+
+            // This uses the implicit token
+            let module = do_fat_lto(
+                &cgcx,
+                &exported_symbols_for_lto,
+                &each_linked_rlib_file_for_lto,
+                needs_fat_lto,
+                lto_import_only_modules,
+            );
+            compiled_modules.push(module);
+        }
+
         // Drop to print timings
         drop(llvm_start_time);
 
@@ -1726,18 +1723,6 @@ fn spawn_work<'a, B: ExtraBackendMethods>(
         let result = std::panic::catch_unwind(AssertUnwindSafe(|| match work {
             WorkItem::Optimize(m) => execute_optimize_work_item(&cgcx, m),
             WorkItem::CopyPostLtoArtifacts(m) => execute_copy_from_cache_work_item(&cgcx, m),
-            WorkItem::FatLto {
-                exported_symbols_for_lto,
-                each_linked_rlib_for_lto,
-                needs_fat_lto,
-                import_only_modules,
-            } => execute_fat_lto_work_item(
-                &cgcx,
-                &exported_symbols_for_lto,
-                &each_linked_rlib_for_lto,
-                needs_fat_lto,
-                import_only_modules,
-            ),
             WorkItem::ThinLto(m) => execute_thin_lto_work_item(&cgcx, m),
         }));
 

From 3cf3ec667a656e144ca28fdf4f476ee27c94b3be Mon Sep 17 00:00:00 2001
From: bjorn3 <17426603+bjorn3@users.noreply.github.com>
Date: Wed, 9 Jul 2025 15:11:55 +0000
Subject: [PATCH 3/5] Move thin LTO out of the main loop too

---
 compiler/rustc_codegen_ssa/src/back/write.rs | 217 ++++++++++++++-----
 1 file changed, 167 insertions(+), 50 deletions(-)

diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs
index b0076c72ccbd2..262878d6707ee 100644
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@@ -15,8 +15,8 @@ use rustc_data_structures::profiling::{SelfProfilerRef, VerboseTimingGuard};
 use rustc_errors::emitter::Emitter;
 use rustc_errors::translation::Translator;
 use rustc_errors::{
-    Diag, DiagArgMap, DiagCtxt, DiagMessage, ErrCode, FatalErrorMarker, Level, MultiSpan, Style,
-    Suggestions,
+    Diag, DiagArgMap, DiagCtxt, DiagMessage, ErrCode, FatalError, FatalErrorMarker, Level,
+    MultiSpan, Style, Suggestions,
 };
 use rustc_fs_util::link_or_copy;
 use rustc_incremental::{
@@ -992,6 +992,155 @@ fn do_fat_lto<B: ExtraBackendMethods>(
     B::codegen(cgcx, module, &cgcx.module_config)
 }
 
+fn do_thin_lto<'a, B: ExtraBackendMethods>(
+    cgcx: &'a CodegenContext<B>,
+    llvm_start_time: &mut Option<VerboseTimingGuard<'a>>,
+    exported_symbols_for_lto: Arc<Vec<String>>,
+    each_linked_rlib_for_lto: Vec<PathBuf>,
+    needs_thin_lto: Vec<(String, <B as WriteBackendMethods>::ThinBuffer)>,
+    lto_import_only_modules: Vec<(
+        SerializedModule<<B as WriteBackendMethods>::ModuleBuffer>,
+        WorkProduct,
+    )>,
+) -> Vec<CompiledModule> {
+    check_lto_allowed(&cgcx);
+
+    let (coordinator_send, coordinator_receive) = channel();
+
+    // First up, convert our jobserver into a helper thread so we can use normal
+    // mpsc channels to manage our messages and such.
+    // After we've requested tokens then we'll, when we can,
+    // get tokens on `coordinator_receive` which will
+    // get managed in the main loop below.
+    let coordinator_send2 = coordinator_send.clone();
+    let helper = jobserver::client()
+        .into_helper_thread(move |token| {
+            drop(coordinator_send2.send(Message::Token::<B>(token)));
+        })
+        .expect("failed to spawn helper thread");
+
+    let mut work_items = vec![];
+
+    // We have LTO work to do. Perform the serial work here of
+    // figuring out what we're going to LTO and then push a
+    // bunch of work items onto our queue to do LTO. This all
+    // happens on the coordinator thread but it's very quick so
+    // we don't worry about tokens.
+    for (work, cost) in generate_thin_lto_work(
+        cgcx,
+        &exported_symbols_for_lto,
+        &each_linked_rlib_for_lto,
+        needs_thin_lto,
+        lto_import_only_modules,
+    ) {
+        let insertion_index =
+            work_items.binary_search_by_key(&cost, |&(_, cost)| cost).unwrap_or_else(|e| e);
+        work_items.insert(insertion_index, (work, cost));
+        if cgcx.parallel {
+            helper.request_token();
+        }
+    }
+
+    let mut codegen_aborted = None;
+
+    // These are the Jobserver Tokens we currently hold. Does not include
+    // the implicit Token the compiler process owns no matter what.
+    let mut tokens = vec![];
+
+    // Amount of tokens that are used (including the implicit token).
+    let mut used_token_count = 0;
+
+    let mut compiled_modules = vec![];
+
+    // Run the message loop while there's still anything that needs message
+    // processing. Note that as soon as codegen is aborted we simply want to
+    // wait for all existing work to finish, so many of the conditions here
+    // only apply if codegen hasn't been aborted as they represent pending
+    // work to be done.
+    loop {
+        if codegen_aborted.is_none() {
+            if used_token_count == 0 && work_items.is_empty() {
+                // All codegen work is done.
+                break;
+            }
+
+            // Spin up what work we can, only doing this while we've got available
+            // parallelism slots and work left to spawn.
+            while used_token_count < tokens.len() + 1
+                && let Some((item, _)) = work_items.pop()
+            {
+                spawn_work(&cgcx, coordinator_send.clone(), llvm_start_time, item);
+                used_token_count += 1;
+            }
+        } else {
+            // Don't queue up any more work if codegen was aborted, we're
+            // just waiting for our existing children to finish.
+            if used_token_count == 0 {
+                break;
+            }
+        }
+
+        // Relinquish accidentally acquired extra tokens. Subtract 1 for the implicit token.
+        tokens.truncate(used_token_count.saturating_sub(1));
+
+        match coordinator_receive.recv().unwrap() {
+            // Save the token locally and the next turn of the loop will use
+            // this to spawn a new unit of work, or it may get dropped
+            // immediately if we have no more work to spawn.
+            Message::Token(token) => match token {
+                Ok(token) => {
+                    tokens.push(token);
+                }
+                Err(e) => {
+                    let msg = &format!("failed to acquire jobserver token: {e}");
+                    cgcx.diag_emitter.fatal(msg);
+                    codegen_aborted = Some(FatalError);
+                }
+            },
+
+            Message::CodegenDone { .. }
+            | Message::CodegenComplete
+            | Message::CodegenAborted
+            | Message::AddImportOnlyModule { .. } => {
+                unreachable!()
+            }
+
+            Message::WorkItem { result } => {
+                // If a thread exits successfully then we drop a token associated
+                // with that worker and update our `used_token_count` count.
+                // We may later re-acquire a token to continue running more work.
+                // We may also not actually drop a token here if the worker was
+                // running with an "ephemeral token".
+                used_token_count -= 1;
+
+                match result {
+                    Ok(WorkItemResult::Finished(compiled_module)) => {
+                        compiled_modules.push(compiled_module);
+                    }
+                    Ok(WorkItemResult::NeedsFatLto(_)) | Ok(WorkItemResult::NeedsThinLto(_, _)) => {
+                        unreachable!()
+                    }
+                    Err(Some(WorkerFatalError)) => {
+                        // Like `CodegenAborted`, wait for remaining work to finish.
+                        codegen_aborted = Some(FatalError);
+                    }
+                    Err(None) => {
+                        // If the thread failed that means it panicked, so
+                        // we abort immediately.
+                        bug!("worker thread panicked");
+                    }
+                }
+            }
+        }
+    }
+
+    if let Some(codegen_aborted) = codegen_aborted {
+        codegen_aborted.raise();
+    }
+
+    compiled_modules
+}
+
 fn execute_thin_lto_work_item<B: ExtraBackendMethods>(
     cgcx: &CodegenContext<B>,
     module: lto::ThinModule<B>,
@@ -1085,9 +1234,8 @@ fn start_executing_work<B: ExtraBackendMethods>(
     regular_config: Arc<ModuleConfig>,
     allocator_config: Arc<ModuleConfig>,
     allocator_module: Option<ModuleCodegen<B::Module>>,
-    tx_to_llvm_workers: Sender<Message<B>>,
+    coordinator_send: Sender<Message<B>>,
 ) -> thread::JoinHandle<Result<CompiledModules, ()>> {
-    let coordinator_send = tx_to_llvm_workers;
     let sess = tcx.sess;
 
     let mut each_linked_rlib_for_lto = Vec::new();
@@ -1307,7 +1455,6 @@ fn start_executing_work<B: ExtraBackendMethods>(
         let mut needs_fat_lto = Vec::new();
         let mut needs_thin_lto = Vec::new();
         let mut lto_import_only_modules = Vec::new();
-        let mut started_lto = false;
 
         /// Possible state transitions:
         /// - Ongoing -> Completed
@@ -1397,48 +1544,8 @@ fn start_executing_work<B: ExtraBackendMethods>(
                 if running_with_any_token(main_thread_state, running_with_own_token) == 0
                     && work_items.is_empty()
                 {
-                    // All codegen work is done. Do we have LTO work to do?
-                    if needs_fat_lto.is_empty()
-                        && needs_thin_lto.is_empty()
-                        && lto_import_only_modules.is_empty()
-                    {
-                        // Nothing more to do!
-                        break;
-                    }
-
-                    // We have LTO work to do. Perform the serial work here of
-                    // figuring out what we're going to LTO and then push a
-                    // bunch of work items onto our queue to do LTO. This all
-                    // happens on the coordinator thread but it's very quick so
-                    // we don't worry about tokens.
-                    assert!(!started_lto);
-                    started_lto = true;
-
-                    if !needs_fat_lto.is_empty() {
-                        // We're doing fat LTO outside of the main loop.
-                        break;
-                    }
-
-                    check_lto_allowed(&cgcx);
-
-                    let needs_thin_lto = mem::take(&mut needs_thin_lto);
-                    let import_only_modules = mem::take(&mut lto_import_only_modules);
-
-                    for (work, cost) in generate_thin_lto_work(
-                        &cgcx,
-                        &exported_symbols_for_lto,
-                        &each_linked_rlib_file_for_lto,
-                        needs_thin_lto,
-                        import_only_modules,
-                    ) {
-                        let insertion_index = work_items
-                            .binary_search_by_key(&cost, |&(_, cost)| cost)
-                            .unwrap_or_else(|e| e);
-                        work_items.insert(insertion_index, (work, cost));
-                        if cgcx.parallel {
-                            helper.request_token();
-                        }
-                    }
+                    // All codegen work is done.
+                    break;
                 }
 
                 // In this branch, we know that everything has been codegened,
@@ -1576,12 +1683,10 @@ fn start_executing_work<B: ExtraBackendMethods>(
                             compiled_modules.push(compiled_module);
                         }
                         Ok(WorkItemResult::NeedsFatLto(fat_lto_input)) => {
-                            assert!(!started_lto);
                             assert!(needs_thin_lto.is_empty());
                             needs_fat_lto.push(fat_lto_input);
                         }
                         Ok(WorkItemResult::NeedsThinLto(name, thin_buffer)) => {
-                            assert!(!started_lto);
                             assert!(needs_fat_lto.is_empty());
                             needs_thin_lto.push((name, thin_buffer));
                         }
@@ -1598,7 +1703,6 @@ fn start_executing_work<B: ExtraBackendMethods>(
                 }
 
                 Message::AddImportOnlyModule { module_data, work_product } => {
-                    assert!(!started_lto);
                     assert_eq!(codegen_state, Ongoing);
                     assert_eq!(main_thread_state, MainThreadState::Codegenning);
                     lto_import_only_modules.push((module_data, work_product));
@@ -1614,6 +1718,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
         drop(codegen_state);
         drop(tokens);
         drop(helper);
+        assert!(work_items.is_empty());
 
         if !needs_fat_lto.is_empty() {
             assert!(compiled_modules.is_empty());
@@ -1628,6 +1733,18 @@ fn start_executing_work<B: ExtraBackendMethods>(
                 lto_import_only_modules,
             );
             compiled_modules.push(module);
+        } else if !needs_thin_lto.is_empty() || !lto_import_only_modules.is_empty() {
+            assert!(compiled_modules.is_empty());
+            assert!(needs_fat_lto.is_empty());
+
+            compiled_modules.extend(do_thin_lto(
+                &cgcx,
+                &mut llvm_start_time,
+                exported_symbols_for_lto,
+                each_linked_rlib_file_for_lto,
+                needs_thin_lto,
+                lto_import_only_modules,
+            ));
         }
 
         // Drop to print timings

From 53867f23b2f332413ed2a7514a44ee40330e6467 Mon Sep 17 00:00:00 2001
From: bjorn3 <17426603+bjorn3@users.noreply.github.com>
Date: Thu, 31 Jul 2025 10:38:42 +0000
Subject: [PATCH 4/5] Separate thin LTO message and work item types

---
 compiler/rustc_codegen_ssa/src/back/write.rs | 109 ++++++++++++++-----
 1 file changed, 80 insertions(+), 29 deletions(-)

diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs
index 262878d6707ee..7f663df1d148b 100644
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@@ -380,7 +380,7 @@ fn generate_thin_lto_work<B: ExtraBackendMethods>(
     each_linked_rlib_for_lto: &[PathBuf],
     needs_thin_lto: Vec<(String, B::ThinBuffer)>,
     import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>,
-) -> Vec<(WorkItem<B>, u64)> {
+) -> Vec<(ThinLtoWorkItem<B>, u64)> {
     let _prof_timer = cgcx.prof.generic_activity("codegen_thin_generate_lto_work");
 
     let (lto_modules, copy_jobs) = B::run_thin_lto(
@@ -394,11 +394,11 @@ fn generate_thin_lto_work<B: ExtraBackendMethods>(
         .into_iter()
         .map(|module| {
             let cost = module.cost();
-            (WorkItem::ThinLto(module), cost)
+            (ThinLtoWorkItem::ThinLto(module), cost)
         })
         .chain(copy_jobs.into_iter().map(|wp| {
             (
-                WorkItem::CopyPostLtoArtifacts(CachedModuleCodegen {
+                ThinLtoWorkItem::CopyPostLtoArtifacts(CachedModuleCodegen {
                     name: wp.cgu_name.clone(),
                     source: wp,
                 }),
@@ -703,6 +703,12 @@ pub(crate) enum WorkItem<B: WriteBackendMethods> {
     /// Copy the post-LTO artifacts from the incremental cache to the output
     /// directory.
     CopyPostLtoArtifacts(CachedModuleCodegen),
+}
+
+enum ThinLtoWorkItem<B: WriteBackendMethods> {
+    /// Copy the post-LTO artifacts from the incremental cache to the output
+    /// directory.
+    CopyPostLtoArtifacts(CachedModuleCodegen),
     /// Performs thin-LTO on the given module.
     ThinLto(lto::ThinModule<B>),
 }
@@ -752,7 +758,18 @@ impl<B: WriteBackendMethods> WorkItem<B> {
         match self {
             WorkItem::Optimize(m) => desc("opt", "optimize module", &m.name),
             WorkItem::CopyPostLtoArtifacts(m) => desc("cpy", "copy LTO artifacts for", &m.name),
-            WorkItem::ThinLto(m) => desc("lto", "thin-LTO module", m.name()),
+        }
+    }
+}
+
+impl<B: WriteBackendMethods> ThinLtoWorkItem<B> {
+    /// Generate a short description of this work item suitable for use as a thread name.
+    fn short_description(&self) -> String {
+        match self {
+            ThinLtoWorkItem::CopyPostLtoArtifacts(m) => {
+                desc("cpy", "copy LTO artifacts for", &m.name)
+            }
+            ThinLtoWorkItem::ThinLto(m) => desc("lto", "thin-LTO module", m.name()),
         }
     }
 }
@@ -883,7 +900,7 @@ fn execute_optimize_work_item<B: ExtraBackendMethods>(
 fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>(
     cgcx: &CodegenContext<B>,
     module: CachedModuleCodegen,
-) -> WorkItemResult<B> {
+) -> CompiledModule {
     let _timer = cgcx
         .prof
         .generic_activity_with_arg("codegen_copy_artifacts_from_incr_cache", &*module.name);
@@ -956,7 +973,7 @@ fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>(
         cgcx.create_dcx().handle().emit_fatal(errors::NoSavedObjectFile { cgu_name: &module.name })
     }
 
-    WorkItemResult::Finished(CompiledModule {
+    CompiledModule {
         links_from_incr_cache,
         kind: ModuleKind::Regular,
         name: module.name,
@@ -965,7 +982,7 @@ fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>(
         bytecode,
         assembly,
         llvm_ir,
-    })
+    }
 }
 
 fn do_fat_lto<B: ExtraBackendMethods>(
@@ -1015,7 +1032,7 @@ fn do_thin_lto<'a, B: ExtraBackendMethods>(
     let coordinator_send2 = coordinator_send.clone();
     let helper = jobserver::client()
         .into_helper_thread(move |token| {
-            drop(coordinator_send2.send(Message::Token::<B>(token)));
+            drop(coordinator_send2.send(ThinLtoMessage::Token(token)));
         })
         .expect("failed to spawn helper thread");
 
@@ -1069,7 +1086,7 @@ fn do_thin_lto<'a, B: ExtraBackendMethods>(
             while used_token_count < tokens.len() + 1
                 && let Some((item, _)) = work_items.pop()
             {
-                spawn_work(&cgcx, coordinator_send.clone(), llvm_start_time, item);
+                spawn_thin_lto_work(&cgcx, coordinator_send.clone(), llvm_start_time, item);
                 used_token_count += 1;
             }
         } else {
@@ -1087,7 +1104,7 @@ fn do_thin_lto<'a, B: ExtraBackendMethods>(
             // Save the token locally and the next turn of the loop will use
             // this to spawn a new unit of work, or it may get dropped
             // immediately if we have no more work to spawn.
-            Message::Token(token) => match token {
+            ThinLtoMessage::Token(token) => match token {
                 Ok(token) => {
                     tokens.push(token);
                 }
@@ -1098,14 +1115,7 @@ fn do_thin_lto<'a, B: ExtraBackendMethods>(
                 }
             },
 
-            Message::CodegenDone { .. }
-            | Message::CodegenComplete
-            | Message::CodegenAborted
-            | Message::AddImportOnlyModule { .. } => {
-                unreachable!()
-            }
-
-            Message::WorkItem { result } => {
+            ThinLtoMessage::WorkItem { result } => {
                 // If a thread exits successfully then we drop a token associated
                 // with that worker and update our `used_token_count` count.
                 // We may later re-acquire a token to continue running more work.
@@ -1114,12 +1124,7 @@ fn do_thin_lto<'a, B: ExtraBackendMethods>(
                 used_token_count -= 1;
 
                 match result {
-                    Ok(WorkItemResult::Finished(compiled_module)) => {
-                        compiled_modules.push(compiled_module);
-                    }
-                    Ok(WorkItemResult::NeedsFatLto(_)) | Ok(WorkItemResult::NeedsThinLto(_, _)) => {
-                        unreachable!()
-                    }
+                    Ok(compiled_module) => compiled_modules.push(compiled_module),
                     Err(Some(WorkerFatalError)) => {
                         // Like `CodegenAborted`, wait for remaining work to finish.
                         codegen_aborted = Some(FatalError);
@@ -1144,12 +1149,11 @@ fn do_thin_lto<'a, B: ExtraBackendMethods>(
 fn execute_thin_lto_work_item<B: ExtraBackendMethods>(
     cgcx: &CodegenContext<B>,
     module: lto::ThinModule<B>,
-) -> WorkItemResult<B> {
+) -> CompiledModule {
     let _timer = cgcx.prof.generic_activity_with_arg("codegen_module_perform_lto", module.name());
 
     let module = B::optimize_thin(cgcx, module);
-    let module = B::codegen(cgcx, module, &cgcx.module_config);
-    WorkItemResult::Finished(module)
+    B::codegen(cgcx, module, &cgcx.module_config)
 }
 
 /// Messages sent to the coordinator.
@@ -1183,6 +1187,17 @@ pub(crate) enum Message<B: WriteBackendMethods> {
     CodegenAborted,
 }
 
+/// Messages sent to the coordinator.
+pub(crate) enum ThinLtoMessage {
+    /// A jobserver token has become available. Sent from the jobserver helper
+    /// thread.
+    Token(io::Result<Acquired>),
+
+    /// The backend has finished processing a work item for a codegen unit.
+    /// Sent from a backend worker thread.
+    WorkItem { result: Result<CompiledModule, Option<WorkerFatalError>> },
+}
+
 /// A message sent from the coordinator thread to the main thread telling it to
 /// process another codegen unit.
 pub struct CguMessage;
@@ -1839,8 +1854,9 @@ fn spawn_work<'a, B: ExtraBackendMethods>(
     B::spawn_named_thread(cgcx.time_trace, work.short_description(), move || {
         let result = std::panic::catch_unwind(AssertUnwindSafe(|| match work {
             WorkItem::Optimize(m) => execute_optimize_work_item(&cgcx, m),
-            WorkItem::CopyPostLtoArtifacts(m) => execute_copy_from_cache_work_item(&cgcx, m),
-            WorkItem::ThinLto(m) => execute_thin_lto_work_item(&cgcx, m),
+            WorkItem::CopyPostLtoArtifacts(m) => {
+                WorkItemResult::Finished(execute_copy_from_cache_work_item(&cgcx, m))
+            }
         }));
 
         let msg = match result {
@@ -1860,6 +1876,41 @@ fn spawn_work<'a, B: ExtraBackendMethods>(
     .expect("failed to spawn work thread");
 }
 
+fn spawn_thin_lto_work<'a, B: ExtraBackendMethods>(
+    cgcx: &'a CodegenContext<B>,
+    coordinator_send: Sender<ThinLtoMessage>,
+    llvm_start_time: &mut Option<VerboseTimingGuard<'a>>,
+    work: ThinLtoWorkItem<B>,
+) {
+    if llvm_start_time.is_none() {
+        *llvm_start_time = Some(cgcx.prof.verbose_generic_activity("LLVM_passes"));
+    }
+
+    let cgcx = cgcx.clone();
+
+    B::spawn_named_thread(cgcx.time_trace, work.short_description(), move || {
+        let result = std::panic::catch_unwind(AssertUnwindSafe(|| match work {
+            ThinLtoWorkItem::CopyPostLtoArtifacts(m) => execute_copy_from_cache_work_item(&cgcx, m),
+            ThinLtoWorkItem::ThinLto(m) => execute_thin_lto_work_item(&cgcx, m),
+        }));
+
+        let msg = match result {
+            Ok(result) => ThinLtoMessage::WorkItem { result: Ok(result) },
+
+            // We ignore any `FatalError` coming out of `execute_work_item`, as a
+            // diagnostic was already sent off to the main thread - just surface
+            // that there was an error in this worker.
+            Err(err) if err.is::<FatalErrorMarker>() => {
+                ThinLtoMessage::WorkItem { result: Err(Some(WorkerFatalError)) }
+            }
+
+            Err(_) => ThinLtoMessage::WorkItem { result: Err(None) },
+        };
+        drop(coordinator_send.send(msg));
+    })
+    .expect("failed to spawn work thread");
+}
+
 enum SharedEmitterMessage {
     Diagnostic(Diagnostic),
     InlineAsmError(SpanData, String, Level, Option<(String, Vec<InnerSpan>)>),

From a077dbd686809016bd143b66ce73f768725b6431 Mon Sep 17 00:00:00 2001
From: bjorn3 <17426603+bjorn3@users.noreply.github.com>
Date: Thu, 4 Sep 2025 13:08:33 +0000
Subject: [PATCH 5/5] Better timers for LTO

---
 compiler/rustc_codegen_ssa/src/back/write.rs | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs
index 7f663df1d148b..af1dd99a8e146 100644
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@@ -992,7 +992,7 @@ fn do_fat_lto<B: ExtraBackendMethods>(
     mut needs_fat_lto: Vec<FatLtoInput<B>>,
     import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>,
 ) -> CompiledModule {
-    let _timer = cgcx.prof.generic_activity_with_arg("codegen_module_perform_lto", "everything");
+    let _timer = cgcx.prof.verbose_generic_activity("LLVM_fatlto");
 
     check_lto_allowed(&cgcx);
 
@@ -1011,7 +1011,6 @@ fn do_fat_lto<B: ExtraBackendMethods>(
 
 fn do_thin_lto<'a, B: ExtraBackendMethods>(
     cgcx: &'a CodegenContext<B>,
-    llvm_start_time: &mut Option<VerboseTimingGuard<'a>>,
     exported_symbols_for_lto: Arc<Vec<String>>,
     each_linked_rlib_for_lto: Vec<PathBuf>,
     needs_thin_lto: Vec<(String, <B as WriteBackendMethods>::ThinBuffer)>,
@@ -1020,6 +1019,8 @@ fn do_thin_lto<'a, B: ExtraBackendMethods>(
         WorkProduct,
     )>,
 ) -> Vec<CompiledModule> {
+    let _timer = cgcx.prof.verbose_generic_activity("LLVM_thinlto");
+
     check_lto_allowed(&cgcx);
 
     let (coordinator_send, coordinator_receive) = channel();
@@ -1086,7 +1087,7 @@ fn do_thin_lto<'a, B: ExtraBackendMethods>(
             while used_token_count < tokens.len() + 1
                 && let Some((item, _)) = work_items.pop()
             {
-                spawn_thin_lto_work(&cgcx, coordinator_send.clone(), llvm_start_time, item);
+                spawn_thin_lto_work(&cgcx, coordinator_send.clone(), item);
                 used_token_count += 1;
             }
         } else {
@@ -1726,6 +1727,9 @@ fn start_executing_work<B: ExtraBackendMethods>(
             }
         }
 
+        // Drop to print timings
+        drop(llvm_start_time);
+
         if codegen_state == Aborted {
             return Err(());
         }
@@ -1754,7 +1758,6 @@ fn start_executing_work<B: ExtraBackendMethods>(
 
             compiled_modules.extend(do_thin_lto(
                 &cgcx,
-                &mut llvm_start_time,
                 exported_symbols_for_lto,
                 each_linked_rlib_file_for_lto,
                 needs_thin_lto,
@@ -1762,9 +1765,6 @@ fn start_executing_work<B: ExtraBackendMethods>(
             ));
         }
 
-        // Drop to print timings
-        drop(llvm_start_time);
-
         // Regardless of what order these modules completed in, report them to
         // the backend in the same order every time to ensure that we're handing
         // out deterministic results.
@@ -1879,13 +1879,8 @@ fn spawn_work<'a, B: ExtraBackendMethods>(
 fn spawn_thin_lto_work<'a, B: ExtraBackendMethods>(
     cgcx: &'a CodegenContext<B>,
     coordinator_send: Sender<ThinLtoMessage>,
-    llvm_start_time: &mut Option<VerboseTimingGuard<'a>>,
     work: ThinLtoWorkItem<B>,
 ) {
-    if llvm_start_time.is_none() {
-        *llvm_start_time = Some(cgcx.prof.verbose_generic_activity("LLVM_passes"));
-    }
-
     let cgcx = cgcx.clone();
 
     B::spawn_named_thread(cgcx.time_trace, work.short_description(), move || {