Skip to content

Add FMD fault inventory to sled-agent API#10283

Open
smklein wants to merge 14 commits intomainfrom
add-fmd-inventory
Open

Add FMD fault inventory to sled-agent API#10283
smklein wants to merge 14 commits intomainfrom
add-fmd-inventory

Conversation

@smklein
Copy link
Copy Markdown
Collaborator

@smklein smklein commented Apr 16, 2026

Exposes data from fmd-adm through the sled-agent inventory endpoint.

We're extracting:

  • Cases: diagnosed faults with UUID, diagnostic code, URL, and the full event nvlist serialized as JSON
  • Resources: affected components with FMRI, fault status flags

I'm only exposing this through the API right now - Nexus isn't yet shoving it into the DB. Soon! But wanted feedback
on this data first.

To give you a sense of "what does case/resource data look like", here's what I pulled out of Atrium, using fmd-adm:

{
  "type": "available",
  "value": {
    "cases": [
      {
        "code": "SUNOS-8000-KL",
        "event": {
          "class": "list.suspect",
          "code": "SUNOS-8000-KL",
          "de": {
            "authority": {
              "chassis-id": "DL9016712A0001",
              "product-id": "R152-Z32-00",
              "server-id": "atrium",
              "version": 0
            },
            "mod-name": "software-diagnosis",
            "mod-version": "0.1",
            "scheme": "fmd",
            "version": 0
          },
          "diag-time": [
            1667146378,
            127967
          ],
          "fault-list": [
            {
              "asru": {
                "object": {
                  "path": "/var/crash/atrium/.359346d5-c134-c44c-b0fa-db4a08a292d4"
                },
                "scheme": "sw",
                "version": 0
              },
              "certainty": 100,
              "class": "defect.sunos.kernel.panic",
              "crashtime": 1667146083,
              "dump-dir": "/var/crash/atrium",
              "dump-files": [
                "vmdump.2"
              ],
              "os-instance-uuid": "359346d5-c134-c44c-b0fa-db4a08a292d4",
              "panic-time": "Sun Oct 30 16:08:03 2022 UTC",
              "panicstack": "genunix:kadmin+627 () | genunix:uadmin+17d () | unix:brand_sys_syscall32+186 () | ",
              "panicstr": "forced crash dump initiated at user request",
              "resource": {
                "object": {
                  "path": "/var/crash/atrium/.359346d5-c134-c44c-b0fa-db4a08a292d4"
                },
                "scheme": "sw",
                "version": 0
              },
              "savecore-succcess": true,
              "version": 0
            }
          ],
          "fault-list-sz": 1,
          "fault-status": [
            1
          ],
          "severity": "Major",
          "uuid": "359346d5-c134-c44c-b0fa-db4a08a292d4",
          "version": 0
        },
        "url": "http://illumos.org/msg/SUNOS-8000-KL",
        "uuid": "359346d5-c134-c44c-b0fa-db4a08a292d4"
      },
      {
        "code": "PCIEX-8000-DJ",
        "event": {
          "class": "list.suspect",
          "code": "PCIEX-8000-DJ",
          "de": {
            "authority": {
              "chassis-id": "DL9016712A0001",
              "product-id": "R152-Z32-00",
              "server-id": "atrium",
              "version": 0
            },
            "mod-name": "eft",
            "mod-version": "1.16",
            "scheme": "fmd",
            "version": 0
          },
          "diag-time": [
            1729703082,
            937698
          ],
          "fault-list": [
            {
              "asru": {
                "device-path": "/pci@af,0/pci1022,1483@3,5/pci1458,0@0",
                "scheme": "dev",
                "version": 0
              },
              "certainty": 40,
              "class": "fault.io.pciex.device-interr",
              "fru": {
                "authority": {
                  "chassis-id": "DL9016712A0001",
                  "product-id": "R152-Z32-00",
                  "server-id": "atrium"
                },
                "hc-list": [
                  {
                    "hc-id": "0",
                    "hc-name": "motherboard"
                  }
                ],
                "hc-root": "",
                "scheme": "hc",
                "version": 0
              },
              "location": "MB",
              "resource": {
                "authority": {
                  "chassis-id": "DL9016712A0001",
                  "product-id": "R152-Z32-00",
                  "server-id": "atrium"
                },
                "hc-list": [
                  {
                    "hc-id": "0",
                    "hc-name": "motherboard"
                  },
                  {
                    "hc-id": "19",
                    "hc-name": "hostbridge"
                  },
                  {
                    "hc-id": "19",
                    "hc-name": "pciexrc"
                  },
                  {
                    "hc-id": "195",
                    "hc-name": "pciexbus"
                  },
                  {
                    "hc-id": "0",
                    "hc-name": "pciexdev"
                  },
                  {
                    "hc-id": "0",
                    "hc-name": "pciexfn"
                  }
                ],
                "hc-list-sz": 6,
                "hc-root": "",
                "scheme": "hc",
                "version": 0
              },
              "version": 0
            },
            {
              "asru": {
                "device-path": "/pci@af,0/pci1022,1483@3,5",
                "scheme": "dev",
                "version": 0
              },
              "certainty": 20,
              "class": "fault.io.pciex.device-interr",
              "fru": {
                "authority": {
                  "chassis-id": "DL9016712A0001",
                  "product-id": "R152-Z32-00",
                  "server-id": "atrium"
                },
                "hc-list": [
                  {
                    "hc-id": "0",
                    "hc-name": "motherboard"
                  }
                ],
                "hc-root": "",
                "scheme": "hc",
                "version": 0
              },
              "location": "MB",
              "resource": {
                "authority": {
                  "chassis-id": "DL9016712A0001",
                  "product-id": "R152-Z32-00",
                  "server-id": "atrium"
                },
                "hc-list": [
                  {
                    "hc-id": "0",
                    "hc-name": "motherboard"
                  },
                  {
                    "hc-id": "19",
                    "hc-name": "hostbridge"
                  },
                  {
                    "hc-id": "19",
                    "hc-name": "pciexrc"
                  }
                ],
                "hc-list-sz": 3,
                "hc-root": "",
                "scheme": "hc",
                "version": 0
              },
              "version": 0
            },
            {
              "asru": {
                "device-path": "/pci@af,0/pci1022,1483@3,5/pci1458,0@0",
                "scheme": "dev",
                "version": 0
              },
              "certainty": 20,
              "class": "fault.io.pciex.bus-noresp",
              "fru": {
                "authority": {
                  "chassis-id": "DL9016712A0001",
                  "product-id": "R152-Z32-00",
                  "server-id": "atrium"
                },
                "hc-list": [
                  {
                    "hc-id": "0",
                    "hc-name": "motherboard"
                  }
                ],
                "hc-root": "",
                "scheme": "hc",
                "version": 0
              },
              "location": "MB",
              "resource": {
                "authority": {
                  "chassis-id": "DL9016712A0001",
                  "product-id": "R152-Z32-00",
                  "server-id": "atrium"
                },
                "hc-list": [
                  {
                    "hc-id": "0",
                    "hc-name": "motherboard"
                  },
                  {
                    "hc-id": "19",
                    "hc-name": "hostbridge"
                  },
                  {
                    "hc-id": "19",
                    "hc-name": "pciexrc"
                  },
                  {
                    "hc-id": "195",
                    "hc-name": "pciexbus"
                  },
                  {
                    "hc-id": "0",
                    "hc-name": "pciexdev"
                  },
                  {
                    "hc-id": "0",
                    "hc-name": "pciexfn"
                  }
                ],
                "hc-list-sz": 6,
                "hc-root": "",
                "scheme": "hc",
                "version": 0
              },
              "version": 0
            },
            {
              "asru": {
                "device-path": "/pci@af,0/pci1022,1483@3,5",
                "scheme": "dev",
                "version": 0
              },
              "certainty": 20,
              "class": "fault.io.pciex.device-noresp",
              "fru": {
                "authority": {
                  "chassis-id": "DL9016712A0001",
                  "product-id": "R152-Z32-00",
                  "server-id": "atrium"
                },
                "hc-list": [
                  {
                    "hc-id": "0",
                    "hc-name": "motherboard"
                  }
                ],
                "hc-root": "",
                "scheme": "hc",
                "version": 0
              },
              "location": "MB",
              "resource": {
                "authority": {
                  "chassis-id": "DL9016712A0001",
                  "product-id": "R152-Z32-00",
                  "server-id": "atrium"
                },
                "hc-list": [
                  {
                    "hc-id": "0",
                    "hc-name": "motherboard"
                  },
                  {
                    "hc-id": "19",
                    "hc-name": "hostbridge"
                  },
                  {
                    "hc-id": "19",
                    "hc-name": "pciexrc"
                  }
                ],
                "hc-list-sz": 3,
                "hc-root": "",
                "scheme": "hc",
                "version": 0
              },
              "version": 0
            }
          ],
          "fault-list-sz": 4,
          "fault-status": [
            1,
            1,
            1,
            1
          ],
          "severity": "Major",
          "uuid": "71b830c4-cef2-410b-afc8-9c6f504a3c02",
          "version": 0
        },
        "url": "http://illumos.org/msg/PCIEX-8000-DJ",
        "uuid": "71b830c4-cef2-410b-afc8-9c6f504a3c02"
      },
      {
        "code": "SUNOS-8000-KL",
        "event": {
          "class": "list.suspect",
          "code": "SUNOS-8000-KL",
          "de": {
            "authority": {
              "chassis-id": "DL9016712A0001",
              "product-id": "R152-Z32-00",
              "server-id": "atrium",
              "version": 0
            },
            "mod-name": "software-diagnosis",
            "mod-version": "0.1",
            "scheme": "fmd",
            "version": 0
          },
          "diag-time": [
            1644520051,
            664144
          ],
          "fault-list": [
            {
              "asru": {
                "object": {
                  "path": "/var/crash/atrium/.8fbb2f00-47e0-ef18-b56d-d5475cae27f2"
                },
                "scheme": "sw",
                "version": 0
              },
              "certainty": 100,
              "class": "defect.sunos.kernel.panic",
              "crashtime": 1644519866,
              "dump-dir": "/var/crash/atrium",
              "dump-files": [
                "vmdump.0"
              ],
              "os-instance-uuid": "8fbb2f00-47e0-ef18-b56d-d5475cae27f2",
              "panic-time": "Thu Feb 10 19:04:26 2022 UTC",
              "panicstack": "unix:real_mode_stop_cpu_stage2_end+c60d () | unix:trap+1169 () | unix:cmntrap+e9 () | unix:bcopy+368 () | kstat:read_kstat_data+1c6 () | kstat:kstat_ioctl+5b () | genunix:cdev_ioctl+2b () | specfs:spec_ioctl+45 () | genunix:fop_ioctl+5b () | genunix:ioctl+153 () | unix:brand_sys_syscall32+186 () | ",
              "panicstr": "BAD TRAP: type=e (#pf Page fault) rp=fffffe00f5df4910 addr=fffffeb691b5139c",
              "resource": {
                "object": {
                  "path": "/var/crash/atrium/.8fbb2f00-47e0-ef18-b56d-d5475cae27f2"
                },
                "scheme": "sw",
                "version": 0
              },
              "savecore-succcess": true,
              "version": 0
            }
          ],
          "fault-list-sz": 1,
          "fault-status": [
            1
          ],
          "severity": "Major",
          "uuid": "8fbb2f00-47e0-ef18-b56d-d5475cae27f2",
          "version": 0
        },
        "url": "http://illumos.org/msg/SUNOS-8000-KL",
        "uuid": "8fbb2f00-47e0-ef18-b56d-d5475cae27f2"
      },
      {
        "code": "SUNOS-8000-KL",
        "event": {
          "class": "list.suspect",
          "code": "SUNOS-8000-KL",
          "de": {
            "authority": {
              "chassis-id": "DL9016712A0001",
              "product-id": "R152-Z32-00",
              "server-id": "atrium",
              "version": 0
            },
            "mod-name": "software-diagnosis",
            "mod-version": "0.1",
            "scheme": "fmd",
            "version": 0
          },
          "diag-time": [
            1648150968,
            960574
          ],
          "fault-list": [
            {
              "asru": {
                "object": {
                  "path": "/var/crash/atrium/.934d446d-d1db-4f12-88f2-eadd1d0cae22"
                },
                "scheme": "sw",
                "version": 0
              },
              "certainty": 100,
              "class": "defect.sunos.kernel.panic",
              "crashtime": 1648150784,
              "dump-dir": "/var/crash/atrium",
              "dump-files": [
                "vmdump.1"
              ],
              "os-instance-uuid": "934d446d-d1db-4f12-88f2-eadd1d0cae22",
              "panic-time": "Thu Mar 24 19:39:44 2022 UTC",
              "panicstack": "unix:real_mode_stop_cpu_stage2_end+c60d () | unix:trap+1169 () | unix:cmntrap+e9 () | vmm:vmm_kstat_update_vcpu+23 () | kstat:read_kstat_data+f5 () | kstat:kstat_ioctl+5b () | genunix:cdev_ioctl+2b () | specfs:spec_ioctl+45 () | genunix:fop_ioctl+5b () | genunix:ioctl+153 () | unix:brand_sys_syscall32+186 () | ",
              "panicstr": "BAD TRAP: type=e (#pf Page fault) rp=fffffe00f734a950 addr=fffffeb1e67c43d0",
              "resource": {
                "object": {
                  "path": "/var/crash/atrium/.934d446d-d1db-4f12-88f2-eadd1d0cae22"
                },
                "scheme": "sw",
                "version": 0
              },
              "savecore-succcess": true,
              "version": 0
            }
          ],
          "fault-list-sz": 1,
          "fault-status": [
            1
          ],
          "severity": "Major",
          "uuid": "934d446d-d1db-4f12-88f2-eadd1d0cae22",
          "version": 0
        },
        "url": "http://illumos.org/msg/SUNOS-8000-KL",
        "uuid": "934d446d-d1db-4f12-88f2-eadd1d0cae22"
      },
      {
        "code": "SUNOS-8000-J0",
        "event": {
          "class": "list.suspect",
          "code": "SUNOS-8000-J0",
          "de": {
            "authority": {
              "chassis-id": "DL9016712A0001",
              "product-id": "R152-Z32-00",
              "server-id": "atrium",
              "version": 0
            },
            "mod-name": "eft",
            "mod-version": "1.16",
            "scheme": "fmd",
            "version": 0
          },
          "diag-time": [
            1729703082,
            591677
          ],
          "fault-list": [
            {
              "certainty": 50,
              "class": "defect.sunos.eft.unexpected_telemetry",
              "reason": "no valid path to component was found in ereport.io.pciex.rc.nfe-msg",
              "resource": {
                "device-path": "/pci@af,0",
                "scheme": "dev",
                "version": 0
              },
              "response": false,
              "retire": false,
              "version": 0
            },
            {
              "certainty": 50,
              "class": "fault.sunos.eft.unexpected_telemetry",
              "reason": "no valid path to component was found in ereport.io.pciex.rc.nfe-msg",
              "resource": {
                "device-path": "/pci@af,0",
                "scheme": "dev",
                "version": 0
              },
              "response": false,
              "retire": false,
              "version": 0
            }
          ],
          "fault-list-sz": 2,
          "fault-status": [
            3,
            3
          ],
          "severity": "Major",
          "uuid": "cb7808a1-0ae4-4609-859f-772b541fdafb",
          "version": 0
        },
        "url": "http://illumos.org/msg/SUNOS-8000-J0",
        "uuid": "cb7808a1-0ae4-4609-859f-772b541fdafb"
      },
      {
        "code": "SUNOS-8000-KL",
        "event": {
          "class": "list.suspect",
          "code": "SUNOS-8000-KL",
          "de": {
            "authority": {
              "chassis-id": "DL9016712A0001",
              "product-id": "R152-Z32-00",
              "server-id": "atrium",
              "version": 0
            },
            "mod-name": "software-diagnosis",
            "mod-version": "0.1",
            "scheme": "fmd",
            "version": 0
          },
          "diag-time": [
            1685805999,
            471268
          ],
          "fault-list": [
            {
              "asru": {
                "object": {
                  "path": "/var/crash/atrium/.f389ce27-4486-e994-9c34-c5836914f27f"
                },
                "scheme": "sw",
                "version": 0
              },
              "certainty": 100,
              "class": "defect.sunos.kernel.panic",
              "crashtime": 1685805741,
              "dump-dir": "/var/crash/atrium",
              "dump-files": [
                "vmdump.3"
              ],
              "os-instance-uuid": "f389ce27-4486-e994-9c34-c5836914f27f",
              "panic-time": "Sat Jun  3 15:22:21 2023 UTC",
              "panicstack": "fffffffff78b52f3 () | unix:av_dispatch_nmivect+32 () | unix:nmiint+155 () | unix:i86_mwait+12 () | unix:cpu_idle_mwait+14b () | unix:cpu_idle_adaptive+19 () | unix:idle+11b () | unix:thread_start+b () | ",
              "panicstr": "NMI received\n",
              "resource": {
                "object": {
                  "path": "/var/crash/atrium/.f389ce27-4486-e994-9c34-c5836914f27f"
                },
                "scheme": "sw",
                "version": 0
              },
              "savecore-succcess": true,
              "version": 0
            }
          ],
          "fault-list-sz": 1,
          "fault-status": [
            1
          ],
          "severity": "Major",
          "uuid": "f389ce27-4486-e994-9c34-c5836914f27f",
          "version": 0
        },
        "url": "http://illumos.org/msg/SUNOS-8000-KL",
        "uuid": "f389ce27-4486-e994-9c34-c5836914f27f"
      }
    ],
    "resources": [
      {
        "case_id": "71b830c4-cef2-410b-afc8-9c6f504a3c02",
        "faulty": true,
        "fmri": "dev:////pci@af,0/pci1022,1483@3,5",
        "invisible": false,
        "unusable": false,
        "uuid": "d41964aa-62da-480b-bff1-35e0d442843c"
      },
      {
        "case_id": "71b830c4-cef2-410b-afc8-9c6f504a3c02",
        "faulty": true,
        "fmri": "dev:////pci@af,0/pci1022,1483@3,5/pci1458,0@0",
        "invisible": false,
        "unusable": false,
        "uuid": "01f332af-ed19-42cf-a623-3b5767b513f7"
      },
      {
        "case_id": "359346d5-c134-c44c-b0fa-db4a08a292d4",
        "faulty": true,
        "fmri": "sw:///:path=/var/crash/atrium/.359346d5-c134-c44c-b0fa-db4a08a292d4",
        "invisible": false,
        "unusable": false,
        "uuid": "4206a805-00e3-cb06-bf1a-8bf69f8c8be1"
      },
      {
        "case_id": "8fbb2f00-47e0-ef18-b56d-d5475cae27f2",
        "faulty": true,
        "fmri": "sw:///:path=/var/crash/atrium/.8fbb2f00-47e0-ef18-b56d-d5475cae27f2",
        "invisible": false,
        "unusable": false,
        "uuid": "9a8e0d9a-a68c-6578-9d41-dacd39a4a819"
      },
      {
        "case_id": "934d446d-d1db-4f12-88f2-eadd1d0cae22",
        "faulty": true,
        "fmri": "sw:///:path=/var/crash/atrium/.934d446d-d1db-4f12-88f2-eadd1d0cae22",
        "invisible": false,
        "unusable": false,
        "uuid": "607b8a84-4e32-44ad-97e6-b595faffdd1d"
      },
      {
        "case_id": "f389ce27-4486-e994-9c34-c5836914f27f",
        "faulty": true,
        "fmri": "sw:///:path=/var/crash/atrium/.f389ce27-4486-e994-9c34-c5836914f27f",
        "invisible": false,
        "unusable": false,
        "uuid": "79411ef0-9ffc-e7f3-b9d6-9aa908881603"
      }
    ]
  }
}

@smklein smklein force-pushed the add-fmd-inventory branch 4 times, most recently from df22bbb to aaa6509 Compare April 17, 2026 20:05
Comment thread rpaths/src/lib.rs
/// Currently, we only do this for libpq ("pq-sys" package), but this pattern
/// could be generalized for other native libraries.
pub static RPATH_ENV_VARS: &'static [&'static str] = &["DEP_PQ_LIBDIRS"];
/// We scan all of these on every build.rs call. Only env vars that are
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This file is worth a careful review.

We're adding a dependency on fmd-adm - which uses fmd-adm-sys and links against a shared library which we expect to exist on our helios deployments. However, it's not in a stable location, so we need to provide some path info about how to find it.

Within fmd-adm-sys, we expose LIBDIRS metadata that gets picked up here. This is similar to the mechanism used by pq-sys, to configure path information for anyone linking against it.

However, this pre-existing version of configuring rpaths really only expected pq-sys to be the only native library we'd link against. With the addition of fmd-adm-sys, it's more complicated. We might want one. We might want the other. We might want both.

BEFORE this PR, this behavior was:

"Iterate over RPATH_ENV_VARS. If anything is missing an environment variable, panic. Otherwise, configure rpath based on the value of that environment variable"

IN THIS PR, I went with the option:

"Iterate over RPATH_ENV_VARS. If anything is missing an environment variable, return. Otherwise, configure rpath based on the value of that environment variable"

If we want to, I could change this to:

"explicitly take a list of dependencies, panic if any of them are missing"

That would basically be like "bring-your-own RPATH_ENV_VARS".

I didn't do this, because it seemed a little redundant to "specify the direct dependency, depend on omicron_rpaths, then also specify the transitive dependencies you collected", when those transitive dependencies can be inferred. However, if this seems too "fast and loose", we can push this work back onto the caller. But I'll probably need to change all the callsites to configure_default_omicron_rpaths to make this option work.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does the error in the event of a missing env var look like, after this change? Does something else fail in a way that makes it clear what happened?

@smklein smklein force-pushed the add-fmd-inventory branch 2 times, most recently from dad43a7 to 84a6975 Compare April 17, 2026 21:07
Exposes illumos Fault Management Daemon (FMD) data through the
sled-agent inventory endpoint. This lets the control plane see
diagnosed hardware/software faults on each sled.

New API version 35 adds an `fmd: Option<FmdInventory>` field to
`Inventory`. When present, it contains:
- Cases: diagnosed faults with UUID, diagnostic code, URL, and the
  full event nvlist serialized as JSON
- Resources: affected components with FMRI, fault status flags

On illumos, sled-agent queries FMD on each inventory request. On
non-illumos (sim, tests), the field is None. Database storage is
not included — that's a follow-up.
@smklein smklein force-pushed the add-fmd-inventory branch from 84a6975 to 322d5b3 Compare April 17, 2026 21:13
Comment thread cockroach-admin/Cargo.toml
Comment thread dev-tools/omdb/Cargo.toml
Comment thread dev-tools/omicron-dev/Cargo.toml
Comment thread end-to-end-tests/Cargo.toml
Comment thread nexus/db-queries/Cargo.toml
Comment thread nexus/reconfigurator/execution/Cargo.toml
Comment thread nexus/saga-recovery/Cargo.toml
Comment thread nexus/test-utils/Cargo.toml
Comment thread nexus/Cargo.toml
Comment thread ntp-admin/Cargo.toml
The inventory endpoint is async, but FMD queries go through door calls
to fmd(1M) that can stall the calling thread. Move the work onto
spawn_blocking so it doesn't occupy a Tokio worker; surface any
JoinError as FmdInventory::Error.
@smklein smklein force-pushed the add-fmd-inventory branch from 3715de3 to 1fba06e Compare April 17, 2026 22:05
smklein added 2 commits April 17, 2026 15:09
The FmdCase.url docstring now uses backticks instead of quotes around the
example URL, which changes the schema description and thus the spec hash.
The verify-libraries xtask checks that binaries don't link against
unexpected libraries. Add libfmd_adm.so.1 to the allowlist for the
binaries that legitimately need it (sled-agent, sled-agent-sim, and
omicron-dev which spins up sled-agent for tests).
@smklein smklein force-pushed the add-fmd-inventory branch from 1fba06e to 887f61a Compare April 17, 2026 22:09
Comment thread sled-agent/types/versions/src/add_fmd_to_inventory/inventory.rs Outdated
Comment thread sled-agent/types/versions/src/add_fmd_to_inventory/inventory.rs Outdated
Comment thread sled-agent/types/versions/src/add_fmd_to_inventory/inventory.rs Outdated
Comment thread sled-agent/types/versions/src/add_fmd_to_inventory/inventory.rs Outdated
Comment thread sled-agent/types/versions/src/add_fmd_to_inventory/inventory.rs Outdated
Comment thread sled-agent/src/fmd.rs Outdated
Comment thread sled-agent/src/fmd.rs
Comment thread sled-agent/src/fmd.rs Outdated
Comment thread sled-agent/src/fmd.rs Outdated
Comment thread sled-agent/src/fmd.rs Outdated
smklein added 5 commits April 27, 2026 17:18
oxidecomputer/fmd-adm#2 replaced the bool argument on
`FmdAdm::resources()` with an `InvisibleResources` enum to make
callsites self-documenting (per hawk's review feedback). Bump to the
post-merge rev and update the call in sled-agent.
The Err arm was dead code: omicron compiles with panic="abort", so a
panic inside the blocking task aborts the process before the JoinHandle
can return Err. Switch to .expect with a descriptive message — if the
invariant ever changes, the panic will say what happened.

Addresses the take-it-or-leave-it nit at #10283
(comment r3112399887).
The illumos module is cfg-gated, so cargo check on Linux skips it
entirely — the missing trait import wasn't visible locally. CI on helios
caught it: three E0599 errors for from_untyped_uuid on FmdHostCaseUuid
and FmdResourceUuid. Verified fix on atrium.
@smklein smklein marked this pull request as ready for review April 29, 2026 00:20
@smklein
Copy link
Copy Markdown
Collaborator Author

smklein commented Apr 29, 2026

This change should be ready for re-review now!

Comment thread rpaths/src/lib.rs
/// Currently, we only do this for libpq ("pq-sys" package), but this pattern
/// could be generalized for other native libraries.
pub static RPATH_ENV_VARS: &'static [&'static str] = &["DEP_PQ_LIBDIRS"];
/// We scan all of these on every build.rs call. Only env vars that are
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does the error in the event of a missing env var look like, after this change? Does something else fail in a way that makes it clear what happened?

Comment on lines +78 to +86
/// Result of querying FMD for fault information.
#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, JsonSchema)]
#[serde(tag = "type", content = "value", rename_all = "snake_case")]
pub enum FmdInventoryResult {
/// FMD data was successfully collected.
Available(FmdInventory),
/// FMD data collection failed or is not available on this platform.
Error { error: String },
}
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given that this is now just an enum of Available and Error...why not just represent it as a normal Rust Result<FmdInventory, String> instead of reinventing Result?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(we could also make it a Result<FmdInventory, omicron_common::api::external::Error>, probably?)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants