From 6d3b503c6dc86af09661a098b3e159156bc70cd9 Mon Sep 17 00:00:00 2001
From: Sean Klein <sean@oxidecomputer.com>
Date: Fri, 17 Oct 2025 10:49:17 -0700
Subject: [PATCH 1/8] [support bundle] Refactor into tasks

---
 .../tasks/support_bundle_collector.rs         | 682 ++++++++++++------
 1 file changed, 457 insertions(+), 225 deletions(-)
diff --git a/nexus/src/app/background/tasks/support_bundle_collector.rs b/nexus/src/app/background/tasks/support_bundle_collector.rs
index 8dc13e7ab42..ee2224edba8 100644
--- a/nexus/src/app/background/tasks/support_bundle_collector.rs
+++ b/nexus/src/app/background/tasks/support_bundle_collector.rs
@@ -59,12 +59,11 @@ use std::future::Future;
 use std::io::Write;
 use std::num::NonZeroU64;
 use std::sync::Arc;
-use std::sync::atomic::{AtomicUsize, Ordering};
 use tokio::io::AsyncReadExt;
 use tokio::io::AsyncSeekExt;
 use tokio::io::AsyncWriteExt;
 use tokio::io::SeekFrom;
-use tokio_util::task::AbortOnDropHandle;
+use tokio::sync::OnceCell;
 use tufaceous_artifact::ArtifactHash;
 use uuid::Uuid;
 use zip::ZipArchive;
@@ -428,8 +427,6 @@ impl SupportBundleCollector {
             request: request.clone(),
             bundle: bundle.clone(),
             transfer_chunk_size: request.transfer_chunk_size,
-            host_ereports_collected: AtomicUsize::new(0),
-            sp_ereports_collected: AtomicUsize::new(0),
         });
 
         let authz_bundle = authz_support_bundle_from_id(bundle.id.into());
@@ -475,8 +472,60 @@ struct BundleCollection {
     request: BundleRequest,
     bundle: SupportBundle,
     transfer_chunk_size: NonZeroU64,
-    host_ereports_collected: AtomicUsize,
-    sp_ereports_collected: AtomicUsize,
+}
+
+type CollectionStepFn = Box<
+    dyn for<'b> FnOnce(
+            &'b Arc<BundleCollection>,
+            &'b Utf8Path,
+        )
+            -> BoxFuture<'b, anyhow::Result<CollectionStepOutput>>
+        + Send,
+>;
+
+enum CollectionStepOutput {
+    HostEreports(SupportBundleEreportStatus),
+    SpEreports(SupportBundleEreportStatus),
+    SavingSpDumps { listed_sps: bool },
+    // NOTE: The ditinction between this and "Spawn" is pretty artificial -
+    // it's just to preserve a part of the report which says "we tried to
+    // list in-service sleds".
+    //
+    // If we changed the collection report, this could easily be combined
+    // with the "Spawn" variant.
+    SpawnSleds { extra_steps: Vec<(&'static str, CollectionStepFn)> },
+    Spawn { extra_steps: Vec<(&'static str, CollectionStepFn)> },
+    None,
+}
+
+impl CollectionStepOutput {
+    // Updates the collection report based on the output of a collection step,
+    // and possibly extends the set of all steps to be executed.
+    fn process(
+        self,
+        report: &mut SupportBundleCollectionReport,
+        steps: &mut Vec<(&'static str, CollectionStepFn)>,
+    ) {
+        match self {
+            CollectionStepOutput::HostEreports(status) => {
+                report.host_ereports = status;
+            }
+            CollectionStepOutput::SpEreports(status) => {
+                report.sp_ereports = status;
+            }
+            CollectionStepOutput::SavingSpDumps { listed_sps } => {
+                report.listed_sps = listed_sps;
+            }
+            CollectionStepOutput::SpawnSleds { extra_steps } => {
+                report.listed_in_service_sleds = true;
+                steps.extend(extra_steps);
+            }
+            CollectionStepOutput::Spawn { extra_steps } => {
+                steps.extend(extra_steps);
+            }
+            CollectionStepOutput::None => (),
+        }
+    }
 }
 
 impl BundleCollection {
@@ -656,37 +705,72 @@ impl BundleCollection {
         Ok(())
     }
 
-    // Perform the work of collecting the support bundle into a temporary directory
-    //
-    // - "dir" is a directory where data can be stored.
-    // - "bundle" is metadata about the bundle being collected.
-    //
-    // If a partial bundle can be collected, it should be returned as
-    // an Ok(SupportBundleCollectionReport). Any failures from this function
-    // will prevent the support bundle from being collected altogether.
-    //
-    // NOTE: The background task infrastructure will periodically check to see
-    // if the bundle has been cancelled by a user while it is being collected.
-    // If that happens, this function will be CANCELLED at an await point.
-    //
-    // As a result, it is important that this function be implemented as
-    // cancel-safe.
-    async fn collect_bundle_as_file(
+    async fn run_collect_bundle_steps(
         self: &Arc<Self>,
-        dir: &Utf8TempDir,
-    ) -> anyhow::Result<SupportBundleCollectionReport> {
-        let log = &self.log;
-
-        info!(&log, "Collecting bundle as local file");
+        output: &Utf8TempDir,
+        mut steps: Vec<(&'static str, CollectionStepFn)>,
+    ) -> SupportBundleCollectionReport {
         let mut report =
             SupportBundleCollectionReport::new(self.bundle.id.into());
 
-        tokio::fs::write(
-            dir.path().join("bundle_id.txt"),
-            self.bundle.id.to_string(),
-        )
-        .await?;
+        const MAX_CONCURRENT_STEPS: usize = 16;
+        let mut tasks =
+            ParallelTaskSet::new_with_parallelism(MAX_CONCURRENT_STEPS);
+
+        loop {
+            // Process all the currently-planned steps
+            while let Some((step_name, step)) = steps.pop() {
+                let previous_result = tasks.spawn({
+                    let collection = self.clone();
+                    let dir = output.path().to_path_buf();
+                    async move {
+                        debug!(collection.log, "Running step"; "name" => &step_name);
+                        step(&collection, dir.as_path()).await.inspect_err(|err| {
+                            warn!(
+                                collection.log,
+                                "Step failed";
+                                "name" => &step_name,
+                                InlineErrorChain::new(err.as_ref()),
+                            );
+                        })
+                    }
+                }).await;
+
+                if let Some(Ok(output)) = previous_result {
+                    output.process(&mut report, &mut steps);
+                };
+            }
+
+            // If we've run out of tasks to spawn, join all the existing steps.
+            while let Some(previous_result) = tasks.join_next().await {
+                if let Ok(output) = previous_result {
+                    output.process(&mut report, &mut steps);
+                };
+            }
+
+            // Executing steps may create additional steps, as follow-up work.
+            //
+            // Only finish if we've exhausted all possible steps and joined all spawned work.
+            if steps.is_empty() {
+                return report;
+            }
+        }
+    }
 
+    async fn collect_bundle_id(
+        &self,
+        dir: &Utf8Path,
+    ) -> anyhow::Result<CollectionStepOutput> {
+        tokio::fs::write(dir.join("bundle_id.txt"), self.bundle.id.to_string())
+            .await?;
+
+        Ok(CollectionStepOutput::None)
+    }
+
+    async fn collect_reconfigurator_state(
+        &self,
+        dir: &Utf8Path,
+    ) -> anyhow::Result<CollectionStepOutput> {
         // Collect reconfigurator state
         const NMAX_BLUEPRINTS: usize = 300;
         match reconfigurator_state_load(
@@ -697,7 +781,7 @@ impl BundleCollection {
         .await
         {
             Ok(state) => {
-                let file_path = dir.path().join("reconfigurator_state.json");
+                let file_path = dir.join("reconfigurator_state.json");
                 let file = std::fs::OpenOptions::new()
                     .create(true)
                     .write(true)
@@ -713,7 +797,7 @@ impl BundleCollection {
                     },
                 )?;
                 info!(
-                    log,
+                    self.log,
                     "Support bundle: collected reconfigurator state";
                     "target_blueprint" => ?state.target_blueprint,
                     "num_blueprints" => state.blueprints.len(),
@@ -722,152 +806,322 @@ impl BundleCollection {
             }
             Err(err) => {
                 warn!(
-                    log,
+                    self.log,
                     "Support bundle: failed to collect reconfigurator state";
                     "err" => ?err,
                 );
             }
-        }
+        };
+
+        Ok(CollectionStepOutput::None)
+    }
 
-        let ereport_collection = if let Some(ref ereport_filters) =
-            self.request.ereport_query
+    async fn collect_host_ereports(
+        self: &Arc<Self>,
+        dir: &Utf8Path,
+    ) -> anyhow::Result<CollectionStepOutput> {
+        let Some(ref ereport_filters) = self.request.ereport_query else {
+            debug!(self.log, "Support bundle: ereports not requested");
+            return Ok(CollectionStepOutput::None);
+        };
+        let ereports_dir = dir.join("ereports");
+        let status = match self
+            .save_host_ereports(ereport_filters.clone(), ereports_dir.clone())
+            .await
         {
-            // If ereports are to be included in the bundle, have someone go do
-            // that in the background while we're gathering up other stuff. Note
-            // that the `JoinHandle`s for these tasks are wrapped in
-            // `AbortOnDropHandle`s for cancellation correctness; this ensures
-            // that if collecting the bundle is cancelled and this future is
-            // dropped, the tasks that we've spawned to collect ereports are
-            // aborted as well.
-            let dir = dir.path().join("ereports");
-            let host = AbortOnDropHandle::new(tokio::spawn(
-                self.clone().collect_host_ereports(
-                    ereport_filters.clone(),
-                    dir.clone(),
-                ),
-            ));
-            let sp = AbortOnDropHandle::new(tokio::spawn(
-                self.clone().collect_sp_ereports(ereport_filters.clone(), dir),
-            ));
-            Some((host, sp))
-        } else {
-            debug!(log, "Support bundle: ereports not requested");
-            None
+            Ok(n_collected) => {
+                SupportBundleEreportStatus::Collected { n_collected }
+            }
+            Err((n_collected, err)) => {
+                warn!(
+                    &self.log,
+                    "Support bundle: host ereport collection failed \
+                     ({n_collected} collected successfully)";
+                    InlineErrorChain::new(err.as_ref()),
+                );
+
+                SupportBundleEreportStatus::Failed {
+                    n_collected,
+                    error: err.to_string(),
+                }
+            }
         };
 
-        let all_sleds = self
-            .datastore
-            .sled_list_all_batched(&self.opctx, SledFilter::InService)
-            .await;
+        Ok(CollectionStepOutput::HostEreports(status))
+    }
 
-        if let Ok(mgs_client) = self.create_mgs_client().await {
-            if let Err(e) = write_sled_info(
-                &self.log,
-                &mgs_client,
-                all_sleds.as_deref().ok(),
-                dir.path(),
-            )
+    async fn collect_sp_ereports(
+        self: &Arc<Self>,
+        dir: &Utf8Path,
+    ) -> anyhow::Result<CollectionStepOutput> {
+        let Some(ref ereport_filters) = self.request.ereport_query else {
+            debug!(self.log, "Support bundle: ereports not requested");
+            return Ok(CollectionStepOutput::None);
+        };
+        let ereports_dir = dir.join("ereports");
+        let status = match self
+            .save_sp_ereports(ereport_filters.clone(), ereports_dir.clone())
             .await
-            {
-                error!(log, "Failed to write sled_info.json"; "error" => InlineErrorChain::new(e.as_ref()));
+        {
+            Ok(n_collected) => {
+                SupportBundleEreportStatus::Collected { n_collected }
             }
+            Err((n_collected, err)) => {
+                warn!(
+                    &self.log,
+                    "Support bundle: sp ereport collection failed \
+                     ({n_collected} collected successfully)";
+                    InlineErrorChain::new(err.as_ref()),
+                );
 
-            let sp_dumps_dir = dir.path().join("sp_task_dumps");
-            tokio::fs::create_dir_all(&sp_dumps_dir).await.with_context(
-                || {
-                    format!(
-                        "Failed to create SP task dump directory {sp_dumps_dir}"
-                    )
-                },
-            )?;
+                SupportBundleEreportStatus::Failed {
+                    n_collected,
+                    error: err.to_string(),
+                }
+            }
+        };
 
-            if let Err(e) =
-                save_all_sp_dumps(log, &mgs_client, &sp_dumps_dir).await
-            {
-                error!(log, "Failed to capture SP task dumps"; "error" => InlineErrorChain::new(e.as_ref()));
-            } else {
-                report.listed_sps = true;
-            };
-        } else {
-            warn!(log, "No MGS client, skipping SP task dump collection");
-        }
+        Ok(CollectionStepOutput::SpEreports(status))
+    }
 
-        if let Ok(all_sleds) = all_sleds {
-            report.listed_in_service_sleds = true;
+    async fn get_or_initialize_mgs_client<'a>(
+        &self,
+        mgs_client: &'a OnceCell<Arc<Option<MgsClient>>>,
+    ) -> &'a Arc<Option<MgsClient>> {
+        mgs_client
+            .get_or_init(|| async {
+                Arc::new(self.create_mgs_client().await.ok())
+            })
+            .await
+    }
 
-            const MAX_CONCURRENT_SLED_REQUESTS: usize = 16;
-            const FAILURE_MESSAGE: &str =
-                "Failed to fully collect support bundle info from sled";
-            let mut set = ParallelTaskSet::new_with_parallelism(
-                MAX_CONCURRENT_SLED_REQUESTS,
+    async fn get_or_initialize_all_sleds<'a>(
+        &self,
+        all_sleds: &'a OnceCell<Arc<Option<Vec<Sled>>>>,
+    ) -> &'a Arc<Option<Vec<Sled>>> {
+        all_sleds
+            .get_or_init(|| async {
+                Arc::new(
+                    self.datastore
+                        .sled_list_all_batched(
+                            &self.opctx,
+                            SledFilter::InService,
+                        )
+                        .await
+                        .ok(),
+                )
+            })
+            .await
+    }
+
+    async fn collect_sled_cubby_info(
+        &self,
+        all_sleds: &OnceCell<Arc<Option<Vec<Sled>>>>,
+        mgs_client: &OnceCell<Arc<Option<MgsClient>>>,
+        dir: &Utf8Path,
+    ) -> anyhow::Result<CollectionStepOutput> {
+        let Some(mgs_client) =
+            &**self.get_or_initialize_mgs_client(mgs_client).await
+        else {
+            warn!(
+                self.log,
+                "No MGS client, skipping sled cubby info collection"
             );
+            return Ok(CollectionStepOutput::None);
+        };
+        let nexus_sleds = self
+            .get_or_initialize_all_sleds(all_sleds)
+            .await
+            .as_deref()
+            .unwrap_or_default();
+
+        write_sled_cubby_info(&self.log, mgs_client, nexus_sleds, dir).await?;
+
+        Ok(CollectionStepOutput::None)
+    }
+
+    async fn spawn_sp_dump_collection(
+        &self,
+        mgs_client: &OnceCell<Arc<Option<MgsClient>>>,
+        dir: &Utf8Path,
+    ) -> anyhow::Result<CollectionStepOutput> {
+        let Some(mgs_client) =
+            &**self.get_or_initialize_mgs_client(mgs_client).await
+        else {
+            warn!(self.log, "No MGS client, skipping SP task dump collection");
+            return Ok(CollectionStepOutput::None);
+        };
 
-            for sled in all_sleds {
-                let prev_result = set
-                    .spawn({
-                        let collection: Arc<BundleCollection> = self.clone();
-                        let dir = dir.path().to_path_buf();
+        let sp_dumps_dir = dir.join("sp_task_dumps");
+        tokio::fs::create_dir_all(&sp_dumps_dir).await.with_context(|| {
+            format!("Failed to create SP task dump directory {sp_dumps_dir}")
+        })?;
+
+        let mut extra_steps: Vec<(&'static str, CollectionStepFn)> = vec![];
+        for sp in get_available_sps(&mgs_client).await? {
+            extra_steps.push((
+                "sp dump",
+                Box::new({
+                    let mgs_client = mgs_client.clone();
+                    move |collection, dir| {
                         async move {
-                            collection.collect_data_from_sled(&sled, &dir).await
+                            collection
+                                .collect_sp_dump(&mgs_client, sp, dir)
+                                .await
                         }
-                    })
-                    .await;
-                if let Some(Err(err)) = prev_result {
-                    warn!(&self.log, "{FAILURE_MESSAGE}"; "err" => ?err);
-                }
-            }
-            while let Some(result) = set.join_next().await {
-                if let Err(err) = result {
-                    warn!(&self.log, "{FAILURE_MESSAGE}"; "err" => ?err);
-                }
-            }
+                        .boxed()
+                    }
+                }),
+            ));
         }
 
-        if let Some((host, sp)) = ereport_collection {
-            let (host, sp) = tokio::join!(host, sp);
-            const TASK_FAILURE_MSG: &str = "task failed";
-            let n_collected =
-                self.host_ereports_collected.load(Ordering::Acquire);
-            report.host_ereports = match host
-                .map_err(|e| anyhow::anyhow!("{TASK_FAILURE_MSG}: {e}"))
-                .and_then(|x| x)
-            {
-                Ok(_) => SupportBundleEreportStatus::Collected { n_collected },
-                Err(err) => {
-                    warn!(
-                        &self.log,
-                        "Support bundle: host ereport collection failed \
-                         ({n_collected} collected successfully)";
-                        "err" => ?err,
-                    );
-                    SupportBundleEreportStatus::Failed {
-                        n_collected,
-                        error: err.to_string(),
+        Ok(CollectionStepOutput::Spawn { extra_steps })
+    }
+
+    async fn collect_sp_dump(
+        &self,
+        mgs_client: &MgsClient,
+        sp: SpIdentifier,
+        dir: &Utf8Path,
+    ) -> anyhow::Result<CollectionStepOutput> {
+        save_sp_dumps(mgs_client, sp, dir)
+            .await
+            .with_context(|| format!("SP {} {}", sp.type_, sp.slot))?;
+
+        Ok(CollectionStepOutput::SavingSpDumps { listed_sps: true })
+    }
+
+    // Perform the work of collecting the support bundle into a temporary directory
+    //
+    // - "dir" is a directory where data can be stored.
+    // - "bundle" is metadata about the bundle being collected.
+    //
+    // If a partial bundle can be collected, it should be returned as
+    // an Ok(SupportBundleCollectionReport). Any failures from this function
+    // will prevent the support bundle from being collected altogether.
+    //
+    // NOTE: The background task infrastructure will periodically check to see
+    // if the bundle has been cancelled by a user while it is being collected.
+    // If that happens, this function will be CANCELLED at an await point.
+    //
+    // As a result, it is important that this function be implemented as
+    // cancel-safe.
+    async fn collect_bundle_as_file(
+        self: &Arc<Self>,
+        dir: &Utf8TempDir,
+    ) -> anyhow::Result<SupportBundleCollectionReport> {
+        let log = &self.log;
+
+        info!(&log, "Collecting bundle as local file");
+
+        // Shared, lazy, fallible initialization for sleds
+        let all_sleds: OnceCell<Arc<Option<Vec<Sled>>>> = OnceCell::new();
+        // Shared, lazy, fallible initialization for MGS client
+        let mgs_client: OnceCell<Arc<Option<MgsClient>>> = OnceCell::new();
+
+        let steps: Vec<(&str, CollectionStepFn)> = vec![
+            (
+                "bundle id",
+                Box::new(|collection, dir| {
+                    collection.collect_bundle_id(dir).boxed()
+                }),
+            ),
+            (
+                "reconfigurator state",
+                Box::new(|collection, dir| {
+                    collection.collect_reconfigurator_state(dir).boxed()
+                }),
+            ),
+            (
+                "host ereports",
+                Box::new(|collection, dir| {
+                    collection.collect_host_ereports(dir).boxed()
+                }),
+            ),
+            (
+                "sp ereports",
+                Box::new(|collection, dir| {
+                    collection.collect_sp_ereports(dir).boxed()
+                }),
+            ),
+            (
+                "sled cubby info",
+                Box::new({
+                    let all_sleds = all_sleds.clone();
+                    let mgs_client = mgs_client.clone();
+                    move |collection, dir| {
+                        async move {
+                            collection
+                                .collect_sled_cubby_info(
+                                    &all_sleds,
+                                    &mgs_client,
+                                    dir,
+                                )
+                                .await
+                        }
+                        .boxed()
                     }
-                }
-            };
-            let n_collected =
-                self.sp_ereports_collected.load(Ordering::Acquire);
-            report.sp_ereports = match sp
-                .map_err(|e| anyhow::anyhow!("{TASK_FAILURE_MSG}: {e}"))
-                .and_then(|x| x)
-            {
-                Ok(_) => SupportBundleEreportStatus::Collected { n_collected },
-                Err(err) => {
-                    warn!(
-                        &self.log,
-                        "Support bundle: SP ereport collection failed \
-                         ({n_collected} collected successfully)";
-                        "err" => ?err,
-                    );
-                    SupportBundleEreportStatus::Failed {
-                        n_collected,
-                        error: err.to_string(),
+                }),
+            ),
+            (
+                "spawn steps to query all sp dumps",
+                Box::new({
+                    let mgs_client = mgs_client.clone();
+                    move |collection, dir| {
+                        async move {
+                            collection
+                                .spawn_sp_dump_collection(&mgs_client, dir)
+                                .await
+                        }
+                        .boxed()
                     }
-                }
-            };
+                }),
+            ),
+            (
+                "spawn steps to query all sleds",
+                Box::new({
+                    let all_sleds = all_sleds.clone();
+                    move |collection, _| {
+                        async move {
+                            collection.spawn_query_all_sleds(&all_sleds).await
+                        }
+                        .boxed()
+                    }
+                }),
+            ),
+        ];
+
+        Ok(self.run_collect_bundle_steps(dir, steps).await)
+    }
+
+    async fn spawn_query_all_sleds(
+        &self,
+        all_sleds: &OnceCell<Arc<Option<Vec<Sled>>>>,
+    ) -> anyhow::Result<CollectionStepOutput> {
+        let Some(all_sleds) =
+            self.get_or_initialize_all_sleds(all_sleds).await.as_deref()
+        else {
+            warn!(self.log, "Could not read list of sleds");
+            return Ok(CollectionStepOutput::None);
+        };
+
+        let mut extra_steps: Vec<(&'static str, CollectionStepFn)> = vec![];
+        for sled in all_sleds {
+            extra_steps.push((
+                "sled data",
+                Box::new({
+                    let sled = sled.clone();
+                    move |collection, dir| {
+                        async move {
+                            collection.collect_data_from_sled(&sled, dir).await
+                        }
+                        .boxed()
+                    }
+                }),
+            ));
         }
-        Ok(report)
+
+        return Ok(CollectionStepOutput::SpawnSleds { extra_steps });
     }
 
     // Collect data from a sled, storing it into a directory that will
@@ -880,7 +1134,7 @@ impl BundleCollection {
         &self,
         sled: &nexus_db_model::Sled,
         dir: &Utf8Path,
-    ) -> anyhow::Result<()> {
+    ) -> anyhow::Result<CollectionStepOutput> {
         let log = &self.log;
         info!(&log, "Collecting bundle info from sled"; "sled" => %sled.id());
         let sled_path = dir
@@ -893,7 +1147,7 @@ impl BundleCollection {
             .await?;
 
         if self.request.skip_sled_info {
-            return Ok(());
+            return Ok(CollectionStepOutput::None);
         }
 
         let Ok(sled_client) = nexus_networking::sled_client(
@@ -909,7 +1163,7 @@ impl BundleCollection {
                 "Could not contact sled",
             )
             .await?;
-            return Ok(());
+            return Ok(CollectionStepOutput::None);
         };
 
         // NB: As new sled-diagnostic commands are added they should
@@ -1014,14 +1268,15 @@ impl BundleCollection {
                 error!(&self.log, "failed to write logs output: {e}");
             }
         }
-        return Ok(());
+        return Ok(CollectionStepOutput::None);
     }
 
-    async fn collect_sp_ereports(
-        self: Arc<Self>,
+    async fn save_host_ereports(
+        self: &Arc<Self>,
         filters: EreportFilters,
         dir: Utf8PathBuf,
-    ) -> anyhow::Result<()> {
+    ) -> Result<usize, (usize, anyhow::Error)> {
+        let mut reports = 0;
         let mut paginator = Paginator::new(
             datastore::SQL_BATCH_SIZE,
             dropshot::PaginationOrder::Ascending,
@@ -1029,40 +1284,50 @@ impl BundleCollection {
         while let Some(p) = paginator.next() {
             let ereports = self
                 .datastore
-                .sp_ereports_fetch_matching(
+                .host_ereports_fetch_matching(
                     &self.opctx,
                     &filters,
                     &p.current_pagparams(),
                 )
                 .await
                 .map_err(|e| {
-                    e.internal_context("failed to query for SP ereports")
+                    (
+                        reports,
+                        e.internal_context(
+                            "failed to query for host OS ereports",
+                        )
+                        .into(),
+                    )
                 })?;
             paginator = p.found_batch(&ereports, &|ereport| {
                 (ereport.restart_id.into_untyped_uuid(), ereport.ena)
             });
-
             let n_ereports = ereports.len();
             for ereport in ereports {
-                write_ereport(ereport.into(), &dir).await?;
-                self.sp_ereports_collected.fetch_add(1, Ordering::Release);
+                write_ereport(ereport.into(), &dir)
+                    .await
+                    .map_err(|e| (reports, e))?;
+                reports += 1;
             }
-            debug!(self.log, "Support bundle: added {n_ereports} SP ereports");
+            debug!(
+                self.log,
+                "Support bundle: added {n_ereports} host OS ereports"
+            );
         }
 
         info!(
             self.log,
-            "Support bundle: collected {} total SP ereports",
-            self.sp_ereports_collected.load(Ordering::Relaxed)
+            "Support bundle: collected {} total host ereports", reports
         );
-        Ok(())
+        Ok(reports)
     }
 
-    async fn collect_host_ereports(
-        self: Arc<Self>,
+    async fn save_sp_ereports(
+        self: &Arc<Self>,
         filters: EreportFilters,
         dir: Utf8PathBuf,
-    ) -> anyhow::Result<()> {
+    ) -> Result<usize, (usize, anyhow::Error)> {
+        let mut reports = 0;
         let mut paginator = Paginator::new(
             datastore::SQL_BATCH_SIZE,
             dropshot::PaginationOrder::Ascending,
@@ -1070,35 +1335,37 @@ impl BundleCollection {
         while let Some(p) = paginator.next() {
             let ereports = self
                 .datastore
-                .host_ereports_fetch_matching(
+                .sp_ereports_fetch_matching(
                     &self.opctx,
                     &filters,
                     &p.current_pagparams(),
                 )
                 .await
                 .map_err(|e| {
-                    e.internal_context("failed to query for host OS ereports")
+                    (
+                        reports,
+                        e.internal_context("failed to query for SP ereports")
+                            .into(),
+                    )
                 })?;
             paginator = p.found_batch(&ereports, &|ereport| {
                 (ereport.restart_id.into_untyped_uuid(), ereport.ena)
             });
             let n_ereports = ereports.len();
             for ereport in ereports {
-                write_ereport(ereport.into(), &dir).await?;
-                self.host_ereports_collected.fetch_add(1, Ordering::Release);
+                write_ereport(ereport.into(), &dir)
+                    .await
+                    .map_err(|e| (reports, e))?;
+                reports += 1;
             }
-            debug!(
-                self.log,
-                "Support bundle: added {n_ereports} host OS ereports"
-            );
+            debug!(self.log, "Support bundle: added {n_ereports} SP ereports");
         }
 
         info!(
             self.log,
-            "Support bundle: collected {} total host ereports",
-            self.host_ereports_collected.load(Ordering::Relaxed)
+            "Support bundle: collected {} total SP ereports", reports
         );
-        Ok(())
+        Ok(reports)
     }
 
     async fn create_mgs_client(&self) -> anyhow::Result<MgsClient> {
@@ -1396,40 +1663,6 @@ where
     Ok(())
 }
 
-/// Collect task dumps from all SPs via MGS and save them to a directory.
-async fn save_all_sp_dumps(
-    log: &slog::Logger,
-    mgs_client: &MgsClient,
-    sp_dumps_dir: &Utf8Path,
-) -> anyhow::Result<()> {
-    let available_sps = get_available_sps(&mgs_client).await?;
-
-    let mut tasks = ParallelTaskSet::new();
-    for sp in available_sps {
-        let mgs_client = mgs_client.clone();
-        let sp_dumps_dir = sp_dumps_dir.to_owned();
-
-        tasks
-            .spawn(async move {
-                save_sp_dumps(mgs_client, sp, sp_dumps_dir)
-                    .await
-                    .with_context(|| format!("SP {} {}", sp.type_, sp.slot))
-            })
-            .await;
-    }
-    for result in tasks.join_all().await {
-        if let Err(e) = result {
-            error!(
-                log,
-                "failed to capture task dumps";
-                "error" => InlineErrorChain::new(e.as_ref())
-            );
-        }
-    }
-
-    Ok(())
-}
-
 /// Use MGS ignition info to find active SPs.
 async fn get_available_sps(
     mgs_client: &MgsClient,
@@ -1455,9 +1688,9 @@ async fn get_available_sps(
 
 /// Fetch and save task dumps from a single SP.
 async fn save_sp_dumps(
-    mgs_client: MgsClient,
+    mgs_client: &MgsClient,
     sp: SpIdentifier,
-    sp_dumps_dir: Utf8PathBuf,
+    sp_dumps_dir: &Utf8Path,
 ) -> anyhow::Result<()> {
     let dump_count = mgs_client
         .sp_task_dump_count(&sp.type_, sp.slot)
@@ -1488,10 +1721,10 @@ async fn save_sp_dumps(
 
 /// Write a file with a JSON mapping of sled serial numbers to cubby and UUIDs for easier
 /// identification of sleds present in a bundle.
-async fn write_sled_info(
+async fn write_sled_cubby_info(
     log: &slog::Logger,
     mgs_client: &MgsClient,
-    nexus_sleds: Option<&[Sled]>,
+    nexus_sleds: &[Sled],
     dir: &Utf8Path,
 ) -> anyhow::Result<()> {
     #[derive(Serialize)]
@@ -1506,7 +1739,6 @@ async fn write_sled_info(
 
     // We can still get a useful mapping of cubby to serial using just the data from MGS.
     let mut nexus_map: BTreeMap<_, _> = nexus_sleds
-        .unwrap_or_default()
         .into_iter()
         .map(|sled| (sled.serial_number(), sled))
         .collect();

From 5abe573ba3ab32dd68dc74afb29691b9b6d4636e Mon Sep 17 00:00:00 2001
From: Sean Klein <sean@oxidecomputer.com>
Date: Fri, 17 Oct 2025 16:06:33 -0700
Subject: [PATCH 2/8] [support bundle] More structured data filtering

---
 .../tasks/support_bundle_collector.rs         | 135 +++++++++++++++---
 1 file changed, 115 insertions(+), 20 deletions(-)

diff --git a/nexus/src/app/background/tasks/support_bundle_collector.rs b/nexus/src/app/background/tasks/support_bundle_collector.rs
index ee2224edba8..1e29e05b2d2 100644
--- a/nexus/src/app/background/tasks/support_bundle_collector.rs
+++ b/nexus/src/app/background/tasks/support_bundle_collector.rs
@@ -55,6 +55,7 @@ use serde_json::json;
 use sha2::{Digest, Sha256};
 use slog_error_chain::InlineErrorChain;
 use std::collections::BTreeMap;
+use std::collections::HashSet;
 use std::future::Future;
 use std::io::Write;
 use std::num::NonZeroU64;
@@ -82,26 +83,83 @@ fn authz_support_bundle_from_id(id: SupportBundleUuid) -> authz::SupportBundle {
     authz::SupportBundle::new(authz::FLEET, id, LookupType::by_id(id))
 }
 
+// Describes how support bundle data is selected.
+//
+// Multiple values of this enum are joined together into a HashSet.
+// Categories should be additive.
+#[derive(Debug, Clone, Hash, Eq, PartialEq)]
+enum BundleDataCategory {
+    Reconfigurator,
+    HostInfo,
+    SledCubbyInfo,
+    SpDumps,
+}
+
+// The set of sleds to include
+#[derive(Debug, Clone, Hash, Eq, PartialEq)]
+enum SledSelection {
+    All,
+    Specific(SledUuid),
+}
+
 // Specifies the data to be collected within the Support Bundle.
 #[derive(Clone)]
 struct BundleRequest {
-    // If "false": Skip collecting host-specific info from each sled.
-    skip_sled_info: bool,
-
     // The size of chunks to use when transferring a bundle from Nexus
     // to a sled agent.
     //
     // Typically, this is CHUNK_SIZE, but can be modified for testing.
     transfer_chunk_size: NonZeroU64,
 
+    // The set of data to be included within this bundle.
+    data_selection: HashSet<BundleDataCategory>,
+
+    // The set of sets to be included within this bundle.
+    //
+    // NOTE: This selection is only considered if "data_selection" requests
+    // data from specific sleds.
+    sled_selection: HashSet<SledSelection>,
+
+    // The set of ereports to be included within this bundle.
+    //
+    // "None" causes ereports to be skipped.
     ereport_query: Option<EreportFilters>,
 }
 
+impl BundleRequest {
+    fn include_reconfigurator_data(&self) -> bool {
+        self.data_selection.contains(&BundleDataCategory::Reconfigurator)
+    }
+
+    fn include_host_info(&self) -> bool {
+        self.data_selection.contains(&BundleDataCategory::HostInfo)
+    }
+
+    fn include_sled(&self, id: SledUuid) -> bool {
+        self.sled_selection.contains(&SledSelection::Specific(id))
+            || self.sled_selection.contains(&SledSelection::All)
+    }
+
+    fn include_sled_cubby_info(&self) -> bool {
+        self.data_selection.contains(&BundleDataCategory::SledCubbyInfo)
+    }
+
+    fn include_sp_dumps(&self) -> bool {
+        self.data_selection.contains(&BundleDataCategory::SpDumps)
+    }
+}
+
 impl Default for BundleRequest {
     fn default() -> Self {
         Self {
-            skip_sled_info: false,
             transfer_chunk_size: CHUNK_SIZE,
+            data_selection: HashSet::from([
+                BundleDataCategory::Reconfigurator,
+                BundleDataCategory::HostInfo,
+                BundleDataCategory::SledCubbyInfo,
+                BundleDataCategory::SpDumps,
+            ]),
+            sled_selection: HashSet::from([SledSelection::All]),
             ereport_query: Some(EreportFilters {
                 start_time: Some(chrono::Utc::now() - chrono::Days::new(7)),
                 ..EreportFilters::default()
@@ -771,6 +829,10 @@ impl BundleCollection {
         &self,
         dir: &Utf8Path,
     ) -> anyhow::Result<CollectionStepOutput> {
+        if !self.request.include_reconfigurator_data() {
+            return Ok(CollectionStepOutput::None);
+        }
+
         // Collect reconfigurator state
         const NMAX_BLUEPRINTS: usize = 300;
         match reconfigurator_state_load(
@@ -920,6 +982,10 @@ impl BundleCollection {
         mgs_client: &OnceCell<Arc<Option<MgsClient>>>,
         dir: &Utf8Path,
     ) -> anyhow::Result<CollectionStepOutput> {
+        if !self.request.include_sled_cubby_info() {
+            return Ok(CollectionStepOutput::None);
+        }
+
         let Some(mgs_client) =
             &**self.get_or_initialize_mgs_client(mgs_client).await
         else {
@@ -945,6 +1011,10 @@ impl BundleCollection {
         mgs_client: &OnceCell<Arc<Option<MgsClient>>>,
         dir: &Utf8Path,
     ) -> anyhow::Result<CollectionStepOutput> {
+        if !self.request.include_sp_dumps() {
+            return Ok(CollectionStepOutput::None);
+        }
+
         let Some(mgs_client) =
             &**self.get_or_initialize_mgs_client(mgs_client).await
         else {
@@ -984,6 +1054,10 @@ impl BundleCollection {
         sp: SpIdentifier,
         dir: &Utf8Path,
     ) -> anyhow::Result<CollectionStepOutput> {
+        if !self.request.include_sp_dumps() {
+            return Ok(CollectionStepOutput::None);
+        }
+
         save_sp_dumps(mgs_client, sp, dir)
             .await
             .with_context(|| format!("SP {} {}", sp.type_, sp.slot))?;
@@ -1098,6 +1172,10 @@ impl BundleCollection {
         &self,
         all_sleds: &OnceCell<Arc<Option<Vec<Sled>>>>,
     ) -> anyhow::Result<CollectionStepOutput> {
+        if !self.request.include_host_info() {
+            return Ok(CollectionStepOutput::None);
+        }
+
         let Some(all_sleds) =
             self.get_or_initialize_all_sleds(all_sleds).await.as_deref()
         else {
@@ -1107,6 +1185,10 @@ impl BundleCollection {
 
         let mut extra_steps: Vec<(&'static str, CollectionStepFn)> = vec![];
         for sled in all_sleds {
+            if !self.request.include_sled(sled.id()) {
+                continue;
+            }
+
             extra_steps.push((
                 "sled data",
                 Box::new({
@@ -1135,6 +1217,12 @@ impl BundleCollection {
         sled: &nexus_db_model::Sled,
         dir: &Utf8Path,
     ) -> anyhow::Result<CollectionStepOutput> {
+        if !self.request.include_host_info()
+            || !self.request.include_sled(sled.id())
+        {
+            return Ok(CollectionStepOutput::None);
+        }
+
         let log = &self.log;
         info!(&log, "Collecting bundle info from sled"; "sled" => %sled.id());
         let sled_path = dir
@@ -1146,10 +1234,6 @@ impl BundleCollection {
         tokio::fs::write(sled_path.join("sled.txt"), format!("{sled:?}"))
             .await?;
 
-        if self.request.skip_sled_info {
-            return Ok(CollectionStepOutput::None);
-        }
-
         let Ok(sled_client) = nexus_networking::sled_client(
             &self.datastore,
             &self.opctx,
@@ -2266,7 +2350,7 @@ mod test {
         let request = BundleRequest {
             // NOTE: The support bundle querying interface isn't supported on
             // the simulated sled agent (yet?) so we're skipping this step.
-            skip_sled_info: true,
+            sled_selection: HashSet::new(),
             ..Default::default()
         };
         let report = collector
@@ -2340,9 +2424,10 @@ mod test {
         // We're going to use a really small chunk size here to force the bundle
         // to get split up.
         let request = BundleRequest {
-            skip_sled_info: true,
             transfer_chunk_size: NonZeroU64::new(16).unwrap(),
+            sled_selection: HashSet::new(),
             ereport_query: None,
+            ..Default::default()
         };
 
         let report = collector
@@ -2430,8 +2515,10 @@ mod test {
         );
 
         // Each time we call "collect_bundle", we collect a SINGLE bundle.
-        let request =
-            BundleRequest { skip_sled_info: true, ..Default::default() };
+        let request = BundleRequest {
+            sled_selection: HashSet::new(),
+            ..Default::default()
+        };
         let report = collector
             .collect_bundle(&opctx, &request)
             .await
@@ -2579,8 +2666,10 @@ mod test {
             false,
             nexus.id(),
         );
-        let request =
-            BundleRequest { skip_sled_info: true, ..Default::default() };
+        let request = BundleRequest {
+            sled_selection: HashSet::new(),
+            ..Default::default()
+        };
         let report = collector
             .collect_bundle(&opctx, &request)
             .await
@@ -2726,8 +2815,10 @@ mod test {
             false,
             nexus.id(),
         );
-        let request =
-            BundleRequest { skip_sled_info: true, ..Default::default() };
+        let request = BundleRequest {
+            sled_selection: HashSet::new(),
+            ..Default::default()
+        };
         let report = collector
             .collect_bundle(&opctx, &request)
             .await
@@ -2811,8 +2902,10 @@ mod test {
             false,
             nexus.id(),
         );
-        let request =
-            BundleRequest { skip_sled_info: true, ..Default::default() };
+        let request = BundleRequest {
+            sled_selection: HashSet::new(),
+            ..Default::default()
+        };
         let report = collector
             .collect_bundle(&opctx, &request)
             .await
@@ -2897,8 +2990,10 @@ mod test {
         );
 
         // Collect the bundle
-        let request =
-            BundleRequest { skip_sled_info: true, ..Default::default() };
+        let request = BundleRequest {
+            sled_selection: HashSet::new(),
+            ..Default::default()
+        };
         let report = collector
             .collect_bundle(&opctx, &request)
             .await

From 0cb9a6a303da78f39eedd1d4fd230ca264bb4371 Mon Sep 17 00:00:00 2001
From: Sean Klein <sean@oxidecomputer.com>
Date: Mon, 1 Dec 2025 16:08:36 -0800
Subject: [PATCH 3/8] docs

---
 .../src/app/background/tasks/support_bundle_collector.rs | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/nexus/src/app/background/tasks/support_bundle_collector.rs b/nexus/src/app/background/tasks/support_bundle_collector.rs
index 9692db353b4..9a1cc86f909 100644
--- a/nexus/src/app/background/tasks/support_bundle_collector.rs
+++ b/nexus/src/app/background/tasks/support_bundle_collector.rs
@@ -90,13 +90,22 @@ fn authz_support_bundle_from_id(id: SupportBundleUuid) -> authz::SupportBundle {
 // Categories should be additive.
 #[derive(Debug, Clone, Hash, Eq, PartialEq)]
 enum BundleDataCategory {
+    // Collects reconfigurator state (some of the latest blueprints,
+    // information about the target blueprint).
     Reconfigurator,
+    // Collects info from sled agents, running a handful of
+    // diagnostic commands (e.g., zoneadm, dladm, etc).
     HostInfo,
+    // Collects sled serial numbers, cubby numbers, and UUIDs.
     SledCubbyInfo,
+    // Saves task dumps from SPs.
     SpDumps,
 }
 
 // The set of sleds to include
+//
+// Multiple values of this enum are joined together into a HashSet.
+// Therefore "SledSelection::All" overrides specific sleds.
 #[derive(Debug, Clone, Hash, Eq, PartialEq)]
 enum SledSelection {
     All,

From 015f2e971641b59692839fcc3ae03f4406316c7e Mon Sep 17 00:00:00 2001
From: Sean Klein <sean@oxidecomputer.com>
Date: Tue, 2 Dec 2025 11:00:20 -0800
Subject: [PATCH 4/8] meh

---
 nexus/src/app/background/tasks/support_bundle_collector.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nexus/src/app/background/tasks/support_bundle_collector.rs b/nexus/src/app/background/tasks/support_bundle_collector.rs
index 9a1cc86f909..0adc94f37e1 100644
--- a/nexus/src/app/background/tasks/support_bundle_collector.rs
+++ b/nexus/src/app/background/tasks/support_bundle_collector.rs
@@ -1095,7 +1095,7 @@ impl BundleCollection {
 
     // Perform the work of collecting the support bundle into a temporary directory
     //
-    // "dir" is a directory where data can be stored.
+    // "dir" is an output directory where data can be stored.
     //
     // If a partial bundle can be collected, it should be returned as
     // an Ok(SupportBundleCollectionReport). Any failures from this function

From 92415e172d4436e920533d548d1b1fa1bea92db1 Mon Sep 17 00:00:00 2001
From: Sean Klein <sean@oxidecomputer.com>
Date: Tue, 2 Dec 2025 11:24:46 -0800
Subject: [PATCH 5/8] Improve the support bundle report on a step-by-step basis

---
 dev-tools/omdb/src/bin/omdb/nexus.rs          |  15 ++
 .../tasks/support_bundle_collector.rs         | 180 ++++++++++++------
 .../integration_tests/support_bundles.rs      |  43 ++---
 nexus/types/src/internal_api/background.rs    |  31 +++
 4 files changed, 186 insertions(+), 83 deletions(-)

diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs
index 6a842a1d7e7..d8d6bab1bc7 100644
--- a/dev-tools/omdb/src/bin/omdb/nexus.rs
+++ b/dev-tools/omdb/src/bin/omdb/nexus.rs
@@ -98,6 +98,7 @@ use std::fs::OpenOptions;
 use std::os::unix::fs::PermissionsExt;
 use std::str::FromStr;
 use std::sync::Arc;
+use std::time::Duration;
 use support_bundle_viewer::LocalFileAccess;
 use support_bundle_viewer::SupportBundleAccessor;
 use tabled::Tabled;
@@ -2612,6 +2613,7 @@ fn print_task_support_bundle_collector(details: &serde_json::Value) {
                 listed_in_service_sleds,
                 listed_sps,
                 activated_in_db_ok,
+                mut steps,
                 ereports,
             }) = collection_report
             {
@@ -2623,6 +2625,19 @@ fn print_task_support_bundle_collector(details: &serde_json::Value) {
                 println!(
                     "      Bundle was able to list service processors: {listed_sps}"
                 );
+
+                steps.sort_unstable_by_key(|s| s.start);
+                for step in steps {
+                    let duration = (step.end - step.start)
+                        .to_std()
+                        .unwrap_or(Duration::from_millis(0));
+                    println!(
+                        "      Step {} ({}ms): {}",
+                        step.name,
+                        duration.as_millis(),
+                        step.status
+                    );
+                }
                 println!(
                     "      Bundle was activated in the database: {activated_in_db_ok}"
                 );
diff --git a/nexus/src/app/background/tasks/support_bundle_collector.rs b/nexus/src/app/background/tasks/support_bundle_collector.rs
index 0adc94f37e1..2b63ac6e93b 100644
--- a/nexus/src/app/background/tasks/support_bundle_collector.rs
+++ b/nexus/src/app/background/tasks/support_bundle_collector.rs
@@ -6,6 +6,7 @@
 
 use crate::app::background::BackgroundTask;
 use anyhow::Context;
+use anyhow::bail;
 use base64::Engine;
 use camino::Utf8DirEntry;
 use camino::Utf8Path;
@@ -13,6 +14,8 @@ use camino::Utf8PathBuf;
 use camino_tempfile::Utf8TempDir;
 use camino_tempfile::tempdir_in;
 use camino_tempfile::tempfile_in;
+use chrono::DateTime;
+use chrono::Utc;
 use futures::FutureExt;
 use futures::StreamExt;
 use futures::future::BoxFuture;
@@ -38,6 +41,8 @@ use nexus_types::fm::Ereport;
 use nexus_types::identity::Asset;
 use nexus_types::internal_api::background::SupportBundleCleanupReport;
 use nexus_types::internal_api::background::SupportBundleCollectionReport;
+use nexus_types::internal_api::background::SupportBundleCollectionStep;
+use nexus_types::internal_api::background::SupportBundleCollectionStepStatus;
 use nexus_types::internal_api::background::SupportBundleEreportStatus;
 use omicron_common::api::external::DataPageParams;
 use omicron_common::api::external::Error;
@@ -563,47 +568,115 @@ type CollectionStepFn = Box<
         + Send,
 >;
 
-enum CollectionStepOutput {
-    Ereports(SupportBundleEreportStatus),
-    SavingSpDumps { listed_sps: bool },
-    // NOTE: The distinction between this and "Spawn" is pretty artificial -
-    // it's just to preserve a part of the report which says "we tried to
-    // list in-service sleds".
-    //
-    // If we changed the collection report, this could easily be combined
-    // with the "Spawn" variant.
-    SpawnSleds { extra_steps: Vec<(&'static str, CollectionStepFn)> },
-    Spawn { extra_steps: Vec<(&'static str, CollectionStepFn)> },
-    None,
+struct CollectionStep {
+    name: String,
+    step_fn: CollectionStepFn,
+}
+
+impl CollectionStep {
+    fn new(name: impl Into<String>, step_fn: CollectionStepFn) -> Self {
+        Self { name: name.into(), step_fn }
+    }
+
+    async fn run(
+        self,
+        collection: &Arc<BundleCollection>,
+        output: &Utf8Path,
+    ) -> CompletedCollectionStep {
+        let start = Utc::now();
+
+        let output = (self.step_fn)(collection, output)
+            .await
+            .inspect_err(|err| {
+                warn!(
+                    collection.log,
+                    "Step failed";
+                    "name" => &self.name,
+                    InlineErrorChain::new(err.as_ref()),
+                );
+            })
+            .unwrap_or_else(|err| CollectionStepOutput::Failed(err));
+
+        let end = Utc::now();
+
+        CompletedCollectionStep { name: self.name, start, end, output }
+    }
+}
+
+struct CompletedCollectionStep {
+    name: String,
+    start: DateTime<Utc>,
+    end: DateTime<Utc>,
+    output: CollectionStepOutput,
 }
 
-impl CollectionStepOutput {
+impl CompletedCollectionStep {
     // Updates the collection report based on the output of a collection step,
     // and possibly extends the set of all steps to be executed.
     fn process(
         self,
         report: &mut SupportBundleCollectionReport,
-        steps: &mut Vec<(&'static str, CollectionStepFn)>,
+        steps: &mut Vec<CollectionStep>,
     ) {
-        match self {
+        use SupportBundleCollectionStepStatus as Status;
+
+        let status = match self.output {
+            CollectionStepOutput::Skipped => Status::Skipped,
+            CollectionStepOutput::Failed(err) => {
+                Status::Failed(err.to_string())
+            }
             CollectionStepOutput::Ereports(status) => {
                 report.ereports = Some(status);
+                Status::Ok
             }
             CollectionStepOutput::SavingSpDumps { listed_sps } => {
                 report.listed_sps = listed_sps;
+                Status::Ok
             }
             CollectionStepOutput::SpawnSleds { extra_steps } => {
                 report.listed_in_service_sleds = true;
                 steps.extend(extra_steps);
+                Status::Ok
             }
             CollectionStepOutput::Spawn { extra_steps } => {
                 steps.extend(extra_steps);
+                Status::Ok
             }
-            CollectionStepOutput::None => (),
-        }
+            CollectionStepOutput::None => Status::Ok,
+        };
+
+        // Add information about this completed step the bundle report.
+        let step = SupportBundleCollectionStep {
+            name: self.name,
+            start: self.start,
+            end: self.end,
+            status,
+        };
+        report.steps.push(step);
     }
 }
 
+enum CollectionStepOutput {
+    // The step was not executed intentionally
+    Skipped,
+    // The step encountered a fatal error and could not complete.
+    //
+    // It may have still saved a partial set of data to the bundle.
+    Failed(anyhow::Error),
+    Ereports(SupportBundleEreportStatus),
+    SavingSpDumps { listed_sps: bool },
+    // NOTE: The distinction between this and "Spawn" is pretty artificial -
+    // it's just to preserve a part of the report which says "we tried to
+    // list in-service sleds".
+    //
+    // If we changed the collection report, this could easily be combined
+    // with the "Spawn" variant.
+    SpawnSleds { extra_steps: Vec<CollectionStep> },
+    Spawn { extra_steps: Vec<CollectionStep> },
+    // The step completed with nothing to report, and no follow-up steps
+    None,
+}
+
 impl BundleCollection {
     // Collect the bundle within Nexus, and store it on a target sled.
     async fn collect_bundle_and_store_on_sled(
@@ -856,7 +929,7 @@ impl BundleCollection {
     async fn run_collect_bundle_steps(
         self: &Arc<Self>,
         output: &Utf8TempDir,
-        mut steps: Vec<(&'static str, CollectionStepFn)>,
+        mut steps: Vec<CollectionStep>,
     ) -> SupportBundleCollectionReport {
         let mut report =
             SupportBundleCollectionReport::new(self.bundle.id.into());
@@ -867,34 +940,25 @@ impl BundleCollection {
 
         loop {
             // Process all the currently-planned steps
-            while let Some((step_name, step)) = steps.pop() {
+            while let Some(step) = steps.pop() {
                 let previous_result = tasks.spawn({
                     let collection = self.clone();
                     let dir = output.path().to_path_buf();
                     async move {
-                        debug!(collection.log, "Running step"; "name" => &step_name);
-                        step(&collection, dir.as_path()).await.inspect_err(|err| {
-                            warn!(
-                                collection.log,
-                                "Step failed";
-                                "name" => &step_name,
-                                InlineErrorChain::new(err.as_ref()),
-                            );
-                        })
+                        debug!(collection.log, "Running step"; "name" => &step.name);
+                        step.run(&collection, dir.as_path()).await
                     }
                 }).await;
 
-                if let Some(Ok(output)) = previous_result {
+                if let Some(output) = previous_result {
                     output.process(&mut report, &mut steps);
                 };
             }
 
             // If we've run out of tasks to spawn, join any of the previously
             // spawned tasks, if any exist.
-            if let Some(previous_result) = tasks.join_next().await {
-                if let Ok(output) = previous_result {
-                    output.process(&mut report, &mut steps);
-                };
+            if let Some(output) = tasks.join_next().await {
+                output.process(&mut report, &mut steps);
 
                 // As soon as any task completes, see if we can spawn more work
                 // immediately. This ensures that the ParallelTaskSet is
@@ -926,7 +990,7 @@ impl BundleCollection {
         dir: &Utf8Path,
     ) -> anyhow::Result<CollectionStepOutput> {
         if !self.request.include_reconfigurator_data() {
-            return Ok(CollectionStepOutput::None);
+            return Ok(CollectionStepOutput::Skipped);
         }
 
         // Collect reconfigurator state
@@ -1011,17 +1075,13 @@ impl BundleCollection {
         dir: &Utf8Path,
     ) -> anyhow::Result<CollectionStepOutput> {
         if !self.request.include_sled_cubby_info() {
-            return Ok(CollectionStepOutput::None);
+            return Ok(CollectionStepOutput::Skipped);
         }
 
         let Some(mgs_client) =
             &**self.get_or_initialize_mgs_client(mgs_client).await
         else {
-            warn!(
-                self.log,
-                "No MGS client, skipping sled cubby info collection"
-            );
-            return Ok(CollectionStepOutput::None);
+            bail!("Could not initialize MGS client");
         };
         let nexus_sleds = self
             .get_or_initialize_all_sleds(all_sleds)
@@ -1040,14 +1100,13 @@ impl BundleCollection {
         dir: &Utf8Path,
     ) -> anyhow::Result<CollectionStepOutput> {
         if !self.request.include_sp_dumps() {
-            return Ok(CollectionStepOutput::None);
+            return Ok(CollectionStepOutput::Skipped);
         }
 
         let Some(mgs_client) =
             &**self.get_or_initialize_mgs_client(mgs_client).await
         else {
-            warn!(self.log, "No MGS client, skipping SP task dump collection");
-            return Ok(CollectionStepOutput::None);
+            bail!("Could not initialize MGS client");
         };
 
         let sp_dumps_dir = dir.join("sp_task_dumps");
@@ -1055,9 +1114,9 @@ impl BundleCollection {
             format!("Failed to create SP task dump directory {sp_dumps_dir}")
         })?;
 
-        let mut extra_steps: Vec<(&'static str, CollectionStepFn)> = vec![];
+        let mut extra_steps: Vec<CollectionStep> = vec![];
         for sp in get_available_sps(&mgs_client).await? {
-            extra_steps.push((
+            extra_steps.push(CollectionStep::new(
                 "SP dump",
                 Box::new({
                     let mgs_client = mgs_client.clone();
@@ -1083,7 +1142,7 @@ impl BundleCollection {
         dir: &Utf8Path,
     ) -> anyhow::Result<CollectionStepOutput> {
         if !self.request.include_sp_dumps() {
-            return Ok(CollectionStepOutput::None);
+            return Ok(CollectionStepOutput::Skipped);
         }
 
         save_sp_dumps(mgs_client, sp, dir).await.with_context(|| {
@@ -1124,26 +1183,26 @@ impl BundleCollection {
         // Shared, lazy, fallible initialization for MGS client
         let mgs_client: OnceCell<Arc<Option<MgsClient>>> = OnceCell::new();
 
-        let steps: Vec<(&str, CollectionStepFn)> = vec![
-            (
+        let steps: Vec<CollectionStep> = vec![
+            CollectionStep::new(
                 "bundle id",
                 Box::new(|collection, dir| {
                     collection.collect_bundle_id(dir).boxed()
                 }),
             ),
-            (
+            CollectionStep::new(
                 "reconfigurator state",
                 Box::new(|collection, dir| {
                     collection.collect_reconfigurator_state(dir).boxed()
                 }),
             ),
-            (
+            CollectionStep::new(
                 "ereports",
                 Box::new(|collection, dir| {
                     collection.collect_ereports(dir).boxed()
                 }),
             ),
-            (
+            CollectionStep::new(
                 "sled cubby info",
                 Box::new({
                     let all_sleds = all_sleds.clone();
@@ -1162,7 +1221,7 @@ impl BundleCollection {
                     }
                 }),
             ),
-            (
+            CollectionStep::new(
                 "spawn steps to query all SP dumps",
                 Box::new({
                     let mgs_client = mgs_client.clone();
@@ -1176,7 +1235,7 @@ impl BundleCollection {
                     }
                 }),
             ),
-            (
+            CollectionStep::new(
                 "spawn steps to query all sleds",
                 Box::new({
                     let all_sleds = all_sleds.clone();
@@ -1198,23 +1257,22 @@ impl BundleCollection {
         all_sleds: &OnceCell<Arc<Option<Vec<Sled>>>>,
     ) -> anyhow::Result<CollectionStepOutput> {
         if !self.request.include_host_info() {
-            return Ok(CollectionStepOutput::None);
+            return Ok(CollectionStepOutput::Skipped);
         }
 
         let Some(all_sleds) =
             self.get_or_initialize_all_sleds(all_sleds).await.as_deref()
         else {
-            warn!(self.log, "Could not read list of sleds");
-            return Ok(CollectionStepOutput::None);
+            bail!("Could not read list of sleds");
         };
 
-        let mut extra_steps: Vec<(&'static str, CollectionStepFn)> = vec![];
+        let mut extra_steps: Vec<CollectionStep> = vec![];
         for sled in all_sleds {
             if !self.request.include_sled(sled.id()) {
                 continue;
             }
 
-            extra_steps.push((
+            extra_steps.push(CollectionStep::new(
                 "sled data",
                 Box::new({
                     let sled = sled.clone();
@@ -1245,7 +1303,7 @@ impl BundleCollection {
         if !self.request.include_host_info()
             || !self.request.include_sled(sled.id())
         {
-            return Ok(CollectionStepOutput::None);
+            return Ok(CollectionStepOutput::Skipped);
         }
 
         let log = &self.log;
@@ -1272,7 +1330,7 @@ impl BundleCollection {
                 "Could not contact sled",
             )
             .await?;
-            return Ok(CollectionStepOutput::None);
+            bail!("Could not contact sled");
         };
 
         // NB: As new sled-diagnostic commands are added they should
@@ -1386,7 +1444,7 @@ impl BundleCollection {
     ) -> anyhow::Result<CollectionStepOutput> {
         let Some(ref ereport_filters) = self.request.ereport_query else {
             debug!(self.log, "Support bundle: ereports not requested");
-            return Ok(CollectionStepOutput::None);
+            return Ok(CollectionStepOutput::Skipped);
         };
         let ereports_dir = dir.join("ereports");
         let mut status = SupportBundleEreportStatus::default();
diff --git a/nexus/tests/integration_tests/support_bundles.rs b/nexus/tests/integration_tests/support_bundles.rs
index 716bc228ca9..75bb4bfa64d 100644
--- a/nexus/tests/integration_tests/support_bundles.rs
+++ b/nexus/tests/integration_tests/support_bundles.rs
@@ -486,18 +486,18 @@ async fn test_support_bundle_lifecycle(cptestctx: &ControlPlaneTestContext) {
         output.cleanup_report,
         Some(SupportBundleCleanupReport { ..Default::default() })
     );
+
+    let report = output.collection_report.as_ref().expect("Missing report");
+    assert_eq!(report.bundle, bundle.id);
+    assert!(report.listed_in_service_sleds);
+    assert!(report.listed_sps);
+    assert!(report.activated_in_db_ok);
     assert_eq!(
-        output.collection_report,
-        Some(SupportBundleCollectionReport {
-            bundle: bundle.id,
-            listed_in_service_sleds: true,
-            listed_sps: true,
-            activated_in_db_ok: true,
-            ereports: Some(SupportBundleEreportStatus {
-                n_collected: 0,
-                n_found: 0,
-                errors: Vec::new()
-            })
+        report.ereports,
+        Some(SupportBundleEreportStatus {
+            n_collected: 0,
+            n_found: 0,
+            errors: Vec::new()
         })
     );
     let bundle = bundle_get(&client, bundle.id).await.unwrap();
@@ -588,18 +588,17 @@ async fn test_support_bundle_range_requests(
     // Finish collection, activate the bundle.
     let output = activate_bundle_collection_background_task(&cptestctx).await;
     assert_eq!(output.collection_err, None);
+    let report = output.collection_report.as_ref().expect("Missing report");
+    assert_eq!(report.bundle, bundle.id);
+    assert!(report.listed_in_service_sleds);
+    assert!(report.listed_sps);
+    assert!(report.activated_in_db_ok);
     assert_eq!(
-        output.collection_report,
-        Some(SupportBundleCollectionReport {
-            bundle: bundle.id,
-            listed_in_service_sleds: true,
-            listed_sps: true,
-            activated_in_db_ok: true,
-            ereports: Some(SupportBundleEreportStatus {
-                n_collected: 0,
-                n_found: 0,
-                errors: Vec::new()
-            })
+        report.ereports,
+        Some(SupportBundleEreportStatus {
+            n_collected: 0,
+            n_found: 0,
+            errors: Vec::new()
         })
     );
     let bundle = bundle_get(&client, bundle.id).await.unwrap();
diff --git a/nexus/types/src/internal_api/background.rs b/nexus/types/src/internal_api/background.rs
index 08d343182e6..42264d0411b 100644
--- a/nexus/types/src/internal_api/background.rs
+++ b/nexus/types/src/internal_api/background.rs
@@ -281,11 +281,41 @@ pub struct SupportBundleCollectionReport {
     /// True iff the bundle was successfully made 'active' in the database.
     pub activated_in_db_ok: bool,
 
+    /// All steps taken, alongside their timing information, when collecting the
+    /// bundle.
+    pub steps: Vec<SupportBundleCollectionStep>,
+
     /// Status of ereport collection, or `None` if no ereports were requested
     /// for this support bundle.
     pub ereports: Option<SupportBundleEreportStatus>,
 }
 
+#[derive(Debug, Deserialize, Serialize, PartialEq, Eq)]
+pub struct SupportBundleCollectionStep {
+    pub name: String,
+    pub start: DateTime<Utc>,
+    pub end: DateTime<Utc>,
+    pub status: SupportBundleCollectionStepStatus,
+}
+
+#[derive(Debug, Deserialize, Serialize, PartialEq, Eq)]
+pub enum SupportBundleCollectionStepStatus {
+    Ok,
+    Skipped,
+    Failed(String),
+}
+
+impl std::fmt::Display for SupportBundleCollectionStepStatus {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        use SupportBundleCollectionStepStatus::*;
+        match self {
+            Ok => write!(f, "ok"),
+            Skipped => write!(f, "skipped"),
+            Failed(why) => write!(f, "failed: {why}"),
+        }
+    }
+}
+
 #[derive(Debug, Default, Deserialize, Serialize, PartialEq, Eq)]
 pub struct SupportBundleEreportStatus {
     /// The total number of ereports found that match the requested filters.
@@ -309,6 +339,7 @@ impl SupportBundleCollectionReport {
             listed_in_service_sleds: false,
             listed_sps: false,
             activated_in_db_ok: false,
+            steps: vec![],
             ereports: None,
         }
     }

From 4d59114117248febe5ca8a1aa8981fa9f0d7b289 Mon Sep 17 00:00:00 2001
From: Sean Klein <sean@oxidecomputer.com>
Date: Tue, 2 Dec 2025 17:11:36 -0800
Subject: [PATCH 6/8] Better table output, step labels

---
 dev-tools/omdb/src/bin/omdb/nexus.rs          | 36 +++++++++++++------
 .../tasks/support_bundle_collector.rs         |  4 +--
 .../integration_tests/support_bundles.rs      | 22 ++++++++++++
 3 files changed, 50 insertions(+), 12 deletions(-)

diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs
index d8d6bab1bc7..9833a7bf141 100644
--- a/dev-tools/omdb/src/bin/omdb/nexus.rs
+++ b/dev-tools/omdb/src/bin/omdb/nexus.rs
@@ -2626,17 +2626,33 @@ fn print_task_support_bundle_collector(details: &serde_json::Value) {
                     "      Bundle was able to list service processors: {listed_sps}"
                 );
 
+                #[derive(Tabled)]
+                #[tabled(rename_all = "SCREAMING_SNAKE_CASE")]
+                struct StepRow {
+                    step_name: String,
+                    start_time: String,
+                    duration: String,
+                    status: String,
+                }
+
                 steps.sort_unstable_by_key(|s| s.start);
-                for step in steps {
-                    let duration = (step.end - step.start)
-                        .to_std()
-                        .unwrap_or(Duration::from_millis(0));
-                    println!(
-                        "      Step {} ({}ms): {}",
-                        step.name,
-                        duration.as_millis(),
-                        step.status
-                    );
+                let rows: Vec<StepRow> = steps
+                    .into_iter()
+                    .map(|step| {
+                        let duration = (step.end - step.start)
+                            .to_std()
+                            .unwrap_or(Duration::from_millis(0));
+                        StepRow {
+                            step_name: step.name,
+                            start_time: step.start.to_rfc3339(),
+                            duration: format!("{:.3}s", duration.as_secs_f64()),
+                            status: step.status.to_string(),
+                        }
+                    })
+                    .collect();
+
+                if !rows.is_empty() {
+                    println!("\n{}", tabled::Table::new(rows));
                 }
                 println!(
                     "      Bundle was activated in the database: {activated_in_db_ok}"
diff --git a/nexus/src/app/background/tasks/support_bundle_collector.rs b/nexus/src/app/background/tasks/support_bundle_collector.rs
index 2b63ac6e93b..41babc3838b 100644
--- a/nexus/src/app/background/tasks/support_bundle_collector.rs
+++ b/nexus/src/app/background/tasks/support_bundle_collector.rs
@@ -1117,7 +1117,7 @@ impl BundleCollection {
         let mut extra_steps: Vec<CollectionStep> = vec![];
         for sp in get_available_sps(&mgs_client).await? {
             extra_steps.push(CollectionStep::new(
-                "SP dump",
+                format!("SP dump for {:?}", sp),
                 Box::new({
                     let mgs_client = mgs_client.clone();
                     move |collection, dir| {
@@ -1273,7 +1273,7 @@ impl BundleCollection {
             }
 
             extra_steps.push(CollectionStep::new(
-                "sled data",
+                format!("sled data for sled {}", sled.id()),
                 Box::new({
                     let sled = sled.clone();
                     move |collection, dir| {
diff --git a/nexus/tests/integration_tests/support_bundles.rs b/nexus/tests/integration_tests/support_bundles.rs
index 75bb4bfa64d..ade2cbdb2c9 100644
--- a/nexus/tests/integration_tests/support_bundles.rs
+++ b/nexus/tests/integration_tests/support_bundles.rs
@@ -500,6 +500,17 @@ async fn test_support_bundle_lifecycle(cptestctx: &ControlPlaneTestContext) {
             errors: Vec::new()
         })
     );
+
+    // Verify that steps were recorded with reasonable timing data
+    assert!(!report.steps.is_empty(), "Should have recorded some steps");
+    for step in &report.steps {
+        assert!(
+            step.end >= step.start,
+            "Step '{}' end time should be >= start time",
+            step.name
+        );
+    }
+
     let bundle = bundle_get(&client, bundle.id).await.unwrap();
     assert_eq!(bundle.state, SupportBundleState::Active);
 
@@ -601,6 +612,17 @@ async fn test_support_bundle_range_requests(
             errors: Vec::new()
         })
     );
+
+    // Verify that steps were recorded with reasonable timing data
+    assert!(!report.steps.is_empty(), "Should have recorded some steps");
+    for step in &report.steps {
+        assert!(
+            step.end >= step.start,
+            "Step '{}' end time should be >= start time",
+            step.name
+        );
+    }
+
     let bundle = bundle_get(&client, bundle.id).await.unwrap();
     assert_eq!(bundle.state, SupportBundleState::Active);
 

From e69f392cae63f102cf5ff40752db914eaeda1666 Mon Sep 17 00:00:00 2001
From: Sean Klein <sean@oxidecomputer.com>
Date: Wed, 3 Dec 2025 12:58:08 -0800
Subject: [PATCH 7/8] [support bundle] Simplify report, relying on new 'steps'
 infrastructure

---
 dev-tools/omdb/src/bin/omdb/nexus.rs          |  8 --
 .../tasks/support_bundle_collector.rs         | 94 ++++++++++++-------
 .../integration_tests/support_bundles.rs      | 29 +++++-
 nexus/types/src/internal_api/background.rs    | 21 +++--
 4 files changed, 97 insertions(+), 55 deletions(-)

diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs
index 9833a7bf141..b17751821a3 100644
--- a/dev-tools/omdb/src/bin/omdb/nexus.rs
+++ b/dev-tools/omdb/src/bin/omdb/nexus.rs
@@ -2610,8 +2610,6 @@ fn print_task_support_bundle_collector(details: &serde_json::Value) {
 
             if let Some(SupportBundleCollectionReport {
                 bundle,
-                listed_in_service_sleds,
-                listed_sps,
                 activated_in_db_ok,
                 mut steps,
                 ereports,
@@ -2619,12 +2617,6 @@ fn print_task_support_bundle_collector(details: &serde_json::Value) {
             {
                 println!("    Support Bundle Collection Report:");
                 println!("      Bundle ID: {bundle}");
-                println!(
-                    "      Bundle was able to list in-service sleds: {listed_in_service_sleds}"
-                );
-                println!(
-                    "      Bundle was able to list service processors: {listed_sps}"
-                );
 
                 #[derive(Tabled)]
                 #[tabled(rename_all = "SCREAMING_SNAKE_CASE")]
diff --git a/nexus/src/app/background/tasks/support_bundle_collector.rs b/nexus/src/app/background/tasks/support_bundle_collector.rs
index 41babc3838b..5eb9b6f6370 100644
--- a/nexus/src/app/background/tasks/support_bundle_collector.rs
+++ b/nexus/src/app/background/tasks/support_bundle_collector.rs
@@ -629,15 +629,6 @@ impl CompletedCollectionStep {
                 report.ereports = Some(status);
                 Status::Ok
             }
-            CollectionStepOutput::SavingSpDumps { listed_sps } => {
-                report.listed_sps = listed_sps;
-                Status::Ok
-            }
-            CollectionStepOutput::SpawnSleds { extra_steps } => {
-                report.listed_in_service_sleds = true;
-                steps.extend(extra_steps);
-                Status::Ok
-            }
             CollectionStepOutput::Spawn { extra_steps } => {
                 steps.extend(extra_steps);
                 Status::Ok
@@ -664,14 +655,7 @@ enum CollectionStepOutput {
     // It may have still saved a partial set of data to the bundle.
     Failed(anyhow::Error),
     Ereports(SupportBundleEreportStatus),
-    SavingSpDumps { listed_sps: bool },
-    // NOTE: The distinction between this and "Spawn" is pretty artificial -
-    // it's just to preserve a part of the report which says "we tried to
-    // list in-service sleds".
-    //
-    // If we changed the collection report, this could easily be combined
-    // with the "Spawn" variant.
-    SpawnSleds { extra_steps: Vec<CollectionStep> },
+    // The step spawned additional steps to execute
     Spawn { extra_steps: Vec<CollectionStep> },
     // The step completed with nothing to report, and no follow-up steps
     None,
@@ -1149,7 +1133,7 @@ impl BundleCollection {
             format!("failed to save SP dump from: {} {}", sp.type_, sp.slot)
         })?;
 
-        Ok(CollectionStepOutput::SavingSpDumps { listed_sps: true })
+        Ok(CollectionStepOutput::None)
     }
 
     // Perform the work of collecting the support bundle into a temporary directory
@@ -1185,25 +1169,25 @@ impl BundleCollection {
 
         let steps: Vec<CollectionStep> = vec![
             CollectionStep::new(
-                "bundle id",
+                SupportBundleCollectionStep::STEP_BUNDLE_ID,
                 Box::new(|collection, dir| {
                     collection.collect_bundle_id(dir).boxed()
                 }),
             ),
             CollectionStep::new(
-                "reconfigurator state",
+                SupportBundleCollectionStep::STEP_RECONFIGURATOR_STATE,
                 Box::new(|collection, dir| {
                     collection.collect_reconfigurator_state(dir).boxed()
                 }),
             ),
             CollectionStep::new(
-                "ereports",
+                SupportBundleCollectionStep::STEP_EREPORTS,
                 Box::new(|collection, dir| {
                     collection.collect_ereports(dir).boxed()
                 }),
             ),
             CollectionStep::new(
-                "sled cubby info",
+                SupportBundleCollectionStep::STEP_SLED_CUBBY_INFO,
                 Box::new({
                     let all_sleds = all_sleds.clone();
                     let mgs_client = mgs_client.clone();
@@ -1222,7 +1206,7 @@ impl BundleCollection {
                 }),
             ),
             CollectionStep::new(
-                "spawn steps to query all SP dumps",
+                SupportBundleCollectionStep::STEP_SPAWN_SP_DUMPS,
                 Box::new({
                     let mgs_client = mgs_client.clone();
                     move |collection, dir| {
@@ -1236,7 +1220,7 @@ impl BundleCollection {
                 }),
             ),
             CollectionStep::new(
-                "spawn steps to query all sleds",
+                SupportBundleCollectionStep::STEP_SPAWN_SLEDS,
                 Box::new({
                     let all_sleds = all_sleds.clone();
                     move |collection, _| {
@@ -1286,7 +1270,7 @@ impl BundleCollection {
             ));
         }
 
-        return Ok(CollectionStepOutput::SpawnSleds { extra_steps });
+        return Ok(CollectionStepOutput::Spawn { extra_steps });
     }
 
     // Collect data from a sled, storing it into a directory that will
@@ -2425,8 +2409,16 @@ mod test {
             .expect("Collection should have succeeded under test")
             .expect("Collecting the bundle should have generated a report");
         assert_eq!(report.bundle, bundle.id.into());
-        assert!(report.listed_in_service_sleds);
-        assert!(report.listed_sps);
+        // Verify that we spawned steps to query sleds and SPs
+        let step_names: Vec<_> =
+            report.steps.iter().map(|s| s.name.as_str()).collect();
+        assert!(
+            step_names.contains(&SupportBundleCollectionStep::STEP_SPAWN_SLEDS)
+        );
+        assert!(
+            step_names
+                .contains(&SupportBundleCollectionStep::STEP_SPAWN_SP_DUMPS)
+        );
         assert!(report.activated_in_db_ok);
         assert_eq!(
             report.ereports,
@@ -2502,8 +2494,16 @@ mod test {
             .expect("Collection should have succeeded under test")
             .expect("Collecting the bundle should have generated a report");
         assert_eq!(report.bundle, bundle.id.into());
-        assert!(report.listed_in_service_sleds);
-        assert!(report.listed_sps);
+        // Verify that we spawned steps to query sleds and SPs
+        let step_names: Vec<_> =
+            report.steps.iter().map(|s| s.name.as_str()).collect();
+        assert!(
+            step_names.contains(&SupportBundleCollectionStep::STEP_SPAWN_SLEDS)
+        );
+        assert!(
+            step_names
+                .contains(&SupportBundleCollectionStep::STEP_SPAWN_SP_DUMPS)
+        );
         assert!(report.activated_in_db_ok);
 
         let observed_bundle = datastore
@@ -2591,8 +2591,16 @@ mod test {
             .expect("Collection should have succeeded under test")
             .expect("Collecting the bundle should have generated a report");
         assert_eq!(report.bundle, bundle1.id.into());
-        assert!(report.listed_in_service_sleds);
-        assert!(report.listed_sps);
+        // Verify that we spawned steps to query sleds and SPs
+        let step_names: Vec<_> =
+            report.steps.iter().map(|s| s.name.as_str()).collect();
+        assert!(
+            step_names.contains(&SupportBundleCollectionStep::STEP_SPAWN_SLEDS)
+        );
+        assert!(
+            step_names
+                .contains(&SupportBundleCollectionStep::STEP_SPAWN_SP_DUMPS)
+        );
         assert!(report.activated_in_db_ok);
 
         // This is observable by checking the state of bundle1 and bundle2:
@@ -2614,8 +2622,16 @@ mod test {
             .expect("Collection should have succeeded under test")
             .expect("Collecting the bundle should have generated a report");
         assert_eq!(report.bundle, bundle2.id.into());
-        assert!(report.listed_in_service_sleds);
-        assert!(report.listed_sps);
+        // Verify that we spawned steps to query sleds and SPs
+        let step_names: Vec<_> =
+            report.steps.iter().map(|s| s.name.as_str()).collect();
+        assert!(
+            step_names.contains(&SupportBundleCollectionStep::STEP_SPAWN_SLEDS)
+        );
+        assert!(
+            step_names
+                .contains(&SupportBundleCollectionStep::STEP_SPAWN_SP_DUMPS)
+        );
         assert!(report.activated_in_db_ok);
 
         // After another collection request, we'll see that both bundles have
@@ -2742,8 +2758,16 @@ mod test {
             .expect("Collection should have succeeded under test")
             .expect("Collecting the bundle should have generated a report");
         assert_eq!(report.bundle, bundle.id.into());
-        assert!(report.listed_in_service_sleds);
-        assert!(report.listed_sps);
+        // Verify that we spawned steps to query sleds and SPs
+        let step_names: Vec<_> =
+            report.steps.iter().map(|s| s.name.as_str()).collect();
+        assert!(
+            step_names.contains(&SupportBundleCollectionStep::STEP_SPAWN_SLEDS)
+        );
+        assert!(
+            step_names
+                .contains(&SupportBundleCollectionStep::STEP_SPAWN_SP_DUMPS)
+        );
         assert!(report.activated_in_db_ok);
 
         // Cancel the bundle after collection has completed
diff --git a/nexus/tests/integration_tests/support_bundles.rs b/nexus/tests/integration_tests/support_bundles.rs
index ade2cbdb2c9..80ec8af191f 100644
--- a/nexus/tests/integration_tests/support_bundles.rs
+++ b/nexus/tests/integration_tests/support_bundles.rs
@@ -19,6 +19,7 @@ use nexus_types::external_api::shared::SupportBundleInfo;
 use nexus_types::external_api::shared::SupportBundleState;
 use nexus_types::internal_api::background::SupportBundleCleanupReport;
 use nexus_types::internal_api::background::SupportBundleCollectionReport;
+use nexus_types::internal_api::background::SupportBundleCollectionStep;
 use nexus_types::internal_api::background::SupportBundleEreportStatus;
 use omicron_uuid_kinds::SupportBundleUuid;
 use serde::Deserialize;
@@ -489,8 +490,6 @@ async fn test_support_bundle_lifecycle(cptestctx: &ControlPlaneTestContext) {
 
     let report = output.collection_report.as_ref().expect("Missing report");
     assert_eq!(report.bundle, bundle.id);
-    assert!(report.listed_in_service_sleds);
-    assert!(report.listed_sps);
     assert!(report.activated_in_db_ok);
     assert_eq!(
         report.ereports,
@@ -511,6 +510,18 @@ async fn test_support_bundle_lifecycle(cptestctx: &ControlPlaneTestContext) {
         );
     }
 
+    // Verify that we successfully spawned steps to query sleds and SPs
+    let step_names: Vec<_> =
+        report.steps.iter().map(|s| s.name.as_str()).collect();
+    assert!(
+        step_names.contains(&SupportBundleCollectionStep::STEP_SPAWN_SLEDS),
+        "Should have attempted to list in-service sleds"
+    );
+    assert!(
+        step_names.contains(&SupportBundleCollectionStep::STEP_SPAWN_SP_DUMPS),
+        "Should have attempted to list service processors"
+    );
+
     let bundle = bundle_get(&client, bundle.id).await.unwrap();
     assert_eq!(bundle.state, SupportBundleState::Active);
 
@@ -601,8 +612,6 @@ async fn test_support_bundle_range_requests(
     assert_eq!(output.collection_err, None);
     let report = output.collection_report.as_ref().expect("Missing report");
     assert_eq!(report.bundle, bundle.id);
-    assert!(report.listed_in_service_sleds);
-    assert!(report.listed_sps);
     assert!(report.activated_in_db_ok);
     assert_eq!(
         report.ereports,
@@ -623,6 +632,18 @@ async fn test_support_bundle_range_requests(
         );
     }
 
+    // Verify that we successfully spawned steps to query sleds and SPs
+    let step_names: Vec<_> =
+        report.steps.iter().map(|s| s.name.as_str()).collect();
+    assert!(
+        step_names.contains(&SupportBundleCollectionStep::STEP_SPAWN_SLEDS),
+        "Should have attempted to list in-service sleds"
+    );
+    assert!(
+        step_names.contains(&SupportBundleCollectionStep::STEP_SPAWN_SP_DUMPS),
+        "Should have attempted to list service processors"
+    );
+
     let bundle = bundle_get(&client, bundle.id).await.unwrap();
     assert_eq!(bundle.state, SupportBundleState::Active);
 
diff --git a/nexus/types/src/internal_api/background.rs b/nexus/types/src/internal_api/background.rs
index 42264d0411b..dfe008198f9 100644
--- a/nexus/types/src/internal_api/background.rs
+++ b/nexus/types/src/internal_api/background.rs
@@ -272,12 +272,6 @@ pub struct SupportBundleCleanupReport {
 pub struct SupportBundleCollectionReport {
     pub bundle: SupportBundleUuid,
 
-    /// True iff we could list in-service sleds
-    pub listed_in_service_sleds: bool,
-
-    /// True iff we could list the service processors.
-    pub listed_sps: bool,
-
     /// True iff the bundle was successfully made 'active' in the database.
     pub activated_in_db_ok: bool,
 
@@ -298,6 +292,19 @@ pub struct SupportBundleCollectionStep {
     pub status: SupportBundleCollectionStepStatus,
 }
 
+impl SupportBundleCollectionStep {
+    /// Step name constants for the main collection steps.
+    ///
+    /// These are used both when creating steps and when validating in tests.
+    pub const STEP_BUNDLE_ID: &'static str = "bundle id";
+    pub const STEP_RECONFIGURATOR_STATE: &'static str = "reconfigurator state";
+    pub const STEP_EREPORTS: &'static str = "ereports";
+    pub const STEP_SLED_CUBBY_INFO: &'static str = "sled cubby info";
+    pub const STEP_SPAWN_SP_DUMPS: &'static str =
+        "spawn steps to query all SP dumps";
+    pub const STEP_SPAWN_SLEDS: &'static str = "spawn steps to query all sleds";
+}
+
 #[derive(Debug, Deserialize, Serialize, PartialEq, Eq)]
 pub enum SupportBundleCollectionStepStatus {
     Ok,
@@ -336,8 +343,6 @@ impl SupportBundleCollectionReport {
     pub fn new(bundle: SupportBundleUuid) -> Self {
         Self {
             bundle,
-            listed_in_service_sleds: false,
-            listed_sps: false,
             activated_in_db_ok: false,
             steps: vec![],
             ereports: None,

From 1886293ab4d50633bea50058e848bdf70ba1f64c Mon Sep 17 00:00:00 2001
From: Sean Klein <sean@oxidecomputer.com>
Date: Wed, 3 Dec 2025 13:25:00 -0800
Subject: [PATCH 8/8] improve comment

---
 nexus/src/app/background/tasks/support_bundle_collector.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/nexus/src/app/background/tasks/support_bundle_collector.rs b/nexus/src/app/background/tasks/support_bundle_collector.rs
index 0adc94f37e1..34aa31862c4 100644
--- a/nexus/src/app/background/tasks/support_bundle_collector.rs
+++ b/nexus/src/app/background/tasks/support_bundle_collector.rs
@@ -124,10 +124,10 @@ struct BundleRequest {
     // The set of data to be included within this bundle.
     data_selection: HashSet<BundleDataCategory>,
 
-    // The set of sets to be included within this bundle.
+    // The set of sleds to be included within this bundle.
     //
     // NOTE: This selection is only considered if "data_selection" requests
-    // data from specific sleds.
+    // data from sleds.
     sled_selection: HashSet<SledSelection>,
 
     // The set of ereports to be included within this bundle.