From 3ba7d185728d43ec211ff1bb7ae29da0d1ce12da Mon Sep 17 00:00:00 2001 From: Paul Hinze Date: Thu, 14 May 2026 15:00:08 -0500 Subject: [PATCH] Address CodeRabbit feedback on MIR-1018 1. server.go: parse cfg.TLS.AdditionalIPs into the IP set BEFORE coordinate.SetupEtcdTLS() so the etcd server cert SANs include user-configured addresses. Without this, distributed runners reaching the cluster over an explicit AdditionalIP would fail TLS verification against etcd. 2. coordinate.PublicIPs(): route through ComputeAdvertise so the AutocertController DNS sanity check honors per-family netcheck state and the CGNAT filter. Previously this path could leak the netcheck source IP even when its family had zero reachable ports, and the fallback admitted 100.64.0.0/10 addresses through the bare IsGlobalUnicast/IsPrivate check. 3. debug_advertise: warn-and-continue on ipdiscovery failure instead of aborting, mirroring server.go. The whole point of the command is to diagnose discovery failure modes, so it should still exercise the explicit-IP and netcheck paths when interface discovery itself misbehaves. 4. debug_advertise: add --format json via the FormatOptions pattern so the command produces machine-parseable output for tooling/automation. Includes regenerated docs for the new flag. Includes regenerated command docs for debug advertise. --- cli/commands/debug_advertise.go | 75 ++++++++++++++++++++++------ cli/commands/server.go | 21 ++++---- components/coordinate/coordinate.go | 55 +++++++++----------- docs/docs/command/debug-advertise.md | 2 + 4 files changed, 97 insertions(+), 56 deletions(-) diff --git a/cli/commands/debug_advertise.go b/cli/commands/debug_advertise.go index 9257a3dc..519126c7 100644 --- a/cli/commands/debug_advertise.go +++ b/cli/commands/debug_advertise.go @@ -16,6 +16,7 @@ import ( // the final advertised list, so we can debug cases where the server // advertises addresses that aren't actually reachable from clients. func DebugAdvertise(ctx *Context, opts struct { + FormatOptions CloudURL string `long:"cloud-url" description:"Cloud URL to use for netcheck (default: https://api.miren.cloud)"` SkipNetcheck bool `long:"skip-netcheck" description:"Skip the netcheck call and only report interface scan"` AdditionalIPs []string `long:"additional-ip" description:"Simulate a server-configured AdditionalIP (repeatable)"` @@ -30,22 +31,34 @@ func DebugAdvertise(ctx *Context, opts struct { listenAddr = "0.0.0.0:8443" } - ctx.Info("debug advertise — reproducing server advertisement logic") - ctx.Info(" cloud URL: %s", cloudURL) - ctx.Info(" listen: %s", listenAddr) - ctx.Info(" netcheck: %s", boolWord(!opts.SkipNetcheck, "enabled", "skipped")) - ctx.Info("") + // JSON output silences the human-oriented progress logs so the resulting + // document is the only thing on stdout. + humanInfo := func(format string, args ...any) { + if opts.IsJSON() { + return + } + ctx.Info(format, args...) + } + + humanInfo("debug advertise — reproducing server advertisement logic") + humanInfo(" cloud URL: %s", cloudURL) + humanInfo(" listen: %s", listenAddr) + humanInfo(" netcheck: %s", boolWord(!opts.SkipNetcheck, "enabled", "skipped")) + humanInfo("") discoveryOpts := ipdiscovery.Options{} if !opts.SkipNetcheck { discoveryOpts.NetcheckURL = cloudURL } - ctx.Info("Step 1: interface scan") + humanInfo("Step 1: interface scan") discovery, err := ipdiscovery.DiscoverWithTimeout(15*time.Second, ctx.Log, discoveryOpts) if err != nil { + // Match server.go: warn and keep going so the command can still + // exercise the explicit-IP and netcheck paths it's designed to + // diagnose, even when interface discovery itself misbehaves. ctx.Warn("ipdiscovery.Discover failed: %v", err) - return err + discovery = &ipdiscovery.Discovery{} } ipSet := coordinate.NewIPSet() @@ -57,10 +70,10 @@ func DebugAdvertise(ctx *Context, opts struct { // server.go drops link-local addresses before handing the list // to the coordinator — mirror that here. if ip.IsLinkLocalUnicast() { - ctx.Info(" %-15s %-40s [skipped: link-local]", a.Interface, a.IP) + humanInfo(" %-15s %-40s [skipped: link-local]", a.Interface, a.IP) continue } - ctx.Info(" %-15s %-40s (discovered)", a.Interface, a.IP) + humanInfo(" %-15s %-40s (discovered)", a.Interface, a.IP) ipSet.AddDiscovered(ip) } @@ -70,15 +83,15 @@ func DebugAdvertise(ctx *Context, opts struct { ctx.Warn("--additional-ip %q is not a valid IP, skipping", s) continue } - ctx.Info(" %-15s %-40s (explicit)", "user", ip.String()) + humanInfo(" %-15s %-40s (explicit)", "user", ip.String()) ipSet.AddExplicit(ip) } - ctx.Info("") + humanInfo("") - ctx.Info("Step 2: dual-stack netcheck") + humanInfo("Step 2: dual-stack netcheck") var netcheckResult *cloudauth.NetcheckDualStackResult if opts.SkipNetcheck { - ctx.Info(" skipped (--skip-netcheck)") + humanInfo(" skipped (--skip-netcheck)") } else { ports := []cloudauth.NetcheckPort{ {Port: 8443, Protocol: "https"}, @@ -88,12 +101,12 @@ func DebugAdvertise(ctx *Context, opts struct { if err != nil { ctx.Warn("netcheck failed: %v", err) netcheckResult = nil - } else { + } else if !opts.IsJSON() { printNetcheckResponse(ctx, "IPv4", netcheckResult.IPv4) printNetcheckResponse(ctx, "IPv6", netcheckResult.IPv6) } } - ctx.Info("") + humanInfo("") candidates, final := coordinate.ComputeAdvertise(coordinate.AdvertiseInput{ ListenAddr: listenAddr, @@ -101,6 +114,38 @@ func DebugAdvertise(ctx *Context, opts struct { Netcheck: netcheckResult, }) + if opts.IsJSON() { + type candidateJSON struct { + Source string `json:"source"` + HostPort string `json:"host_port"` + IP string `json:"ip,omitempty"` + Classification string `json:"classification,omitempty"` + Included bool `json:"included"` + Reason string `json:"reason"` + } + type output struct { + Candidates []candidateJSON `json:"candidates"` + Advertised []string `json:"advertised"` + } + out := output{ + Candidates: make([]candidateJSON, len(candidates)), + Advertised: final, + } + for i, c := range candidates { + out.Candidates[i] = candidateJSON{ + Source: c.Source, + HostPort: c.HostPort, + Classification: c.Classification, + Included: c.Included, + Reason: c.Reason, + } + if c.IP != nil { + out.Candidates[i].IP = c.IP.String() + } + } + return PrintJSON(out) + } + ctx.Info("Step 3: per-candidate classification and inclusion decision") ctx.Info("") ctx.Info(" %-12s %-40s %-16s %-10s %s", "SOURCE", "IP:PORT", "CLASS", "DECISION", "REASON") diff --git a/cli/commands/server.go b/cli/commands/server.go index 9c970803..11ac73ce 100644 --- a/cli/commands/server.go +++ b/cli/commands/server.go @@ -98,6 +98,18 @@ func Server(ctx *Context, opts serverconfig.CLIFlags) error { ctx.Log.Info("discovered IPs", "addresses", len(discovery.Addresses)) } + // Parse explicit AdditionalIPs up front so they're included in TLS cert + // SANs everywhere — both the API cert and the etcd cert (used by + // distributed runners), the latter of which is built earlier in startup. + for _, ip := range cfg.TLS.AdditionalIPs { + addr := net.ParseIP(ip) + if addr == nil { + ctx.Log.Error("failed to parse additional IP", "ip", ip) + return fmt.Errorf("failed to parse additional IP %s", ip) + } + ipSet.AddExplicit(addr) + } + switch cfg.GetMode() { case "standalone": // Mode defaults are already applied by serverconfig.Load @@ -520,15 +532,6 @@ func Server(ctx *Context, opts serverconfig.CLIFlags) error { systemHandler := observability.NewSystemLogHandler(ctx.Log.Handler(), batchWriter) ctx.Log = slog.New(systemHandler) - for _, ip := range cfg.TLS.AdditionalIPs { - addr := net.ParseIP(ip) - if addr == nil { - ctx.Log.Error("failed to parse additional IP", "ip", ip) - return fmt.Errorf("failed to parse additional IP %s", ip) - } - ipSet.AddExplicit(addr) - } - // Create HTTP metrics httpMetrics := metrics.NewHTTPMetrics(ctx.Log, ctx.ServerState.Writer, ctx.ServerState.Reader) ctx.ServerState.HTTPMetrics = httpMetrics diff --git a/components/coordinate/coordinate.go b/components/coordinate/coordinate.go index 92fe1067..709fde0a 100644 --- a/components/coordinate/coordinate.go +++ b/components/coordinate/coordinate.go @@ -1285,47 +1285,38 @@ func (c *Coordinator) runNetcheck(ctx context.Context) { } } -// PublicIPs returns the cluster's known public IP addresses from netcheck, -// falling back to user-provided AdditionalIPs and auto-discovered IPs -// (filtered to global unicast, non-private) if netcheck hasn't run yet. +// PublicIPs returns the cluster's known public IP addresses, applying the +// same filtering rules as the advertised API addresses. Routes through +// ComputeAdvertise so the AutocertController's DNS sanity check honors +// per-family netcheck state (no leaking the source IP when its family has +// zero reachable ports) and the CGNAT filter (no advertising tailnet +// addresses as "public"). func (c *Coordinator) PublicIPs() []net.IP { c.netcheckMu.RLock() - result := c.netcheckResult + netcheck := c.netcheckResult c.netcheckMu.RUnlock() + cands, _ := ComputeAdvertise(AdvertiseInput{ + IPs: c.IPs.All(), + Netcheck: netcheck, + }) + seen := make(map[string]struct{}) var ips []net.IP - - if result != nil { - for _, resp := range []*cloudauth.NetcheckResponse{result.IPv4, result.IPv6} { - if resp == nil || resp.SourceAddress == "" { - continue - } - ip := net.ParseIP(resp.SourceAddress) - if ip == nil { - continue - } - if _, ok := seen[resp.SourceAddress]; !ok { - seen[resp.SourceAddress] = struct{}{} - ips = append(ips, ip) - } + for _, cand := range cands { + if !cand.Included || cand.IP == nil { + continue } - } - - if len(ips) == 0 { - for _, sip := range c.IPs.All() { - ip := sip.IP - if ip == nil || !ip.IsGlobalUnicast() || ip.IsPrivate() { - continue - } - s := ip.String() - if _, ok := seen[s]; !ok { - seen[s] = struct{}{} - ips = append(ips, ip) - } + if cand.Classification != "global-unicast" { + continue + } + s := cand.IP.String() + if _, ok := seen[s]; ok { + continue } + seen[s] = struct{}{} + ips = append(ips, cand.IP) } - return ips } diff --git a/docs/docs/command/debug-advertise.md b/docs/docs/command/debug-advertise.md index 027e9fb0..7e6d7595 100644 --- a/docs/docs/command/debug-advertise.md +++ b/docs/docs/command/debug-advertise.md @@ -18,6 +18,8 @@ miren debug advertise [flags] - `--additional-ip` — Simulate a server-configured AdditionalIP (repeatable) - `--cloud-url` — Cloud URL to use for netcheck (default: https://api.miren.cloud) +- `--format` — Output format (text, json) (default: `text`) +- `--json` — Shorthand for --format json - `--listen` — Simulate the server's listen address (default: 0.0.0.0:8443) - `--skip-netcheck` — Skip the netcheck call and only report interface scan