diff --git a/controlplane/telemetry/cmd/geoprobe-agent/main.go b/controlplane/telemetry/cmd/geoprobe-agent/main.go index a558182f20..c1bad62495 100644 --- a/controlplane/telemetry/cmd/geoprobe-agent/main.go +++ b/controlplane/telemetry/cmd/geoprobe-agent/main.go @@ -22,10 +22,10 @@ import ( solanarpc "github.com/gagliardetto/solana-go/rpc" "github.com/malbeclabs/doublezero/config" "github.com/malbeclabs/doublezero/controlplane/telemetry/internal/geoprobe" - "github.com/malbeclabs/doublezero/controlplane/telemetry/internal/metrics" geolocation "github.com/malbeclabs/doublezero/sdk/geolocation/go" twamplight "github.com/malbeclabs/doublezero/tools/twamp/pkg/light" "github.com/malbeclabs/doublezero/tools/twamp/pkg/signed" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" ) @@ -337,9 +337,11 @@ func main() { "geoprobe_pubkey", geoProbePubkey, ) - // Set up prometheus metrics server if enabled. + // Set up prometheus metrics. + m := geoprobe.NewMetrics(geoprobe.SourceGeoProbeAgent, geoProbePubkey.String(), prometheus.DefaultRegisterer) + if *metricsEnable { - metrics.GeoProbeBuildInfo.WithLabelValues(version, commit, date).Set(1) + m.BuildInfo.WithLabelValues(version, commit, date).Set(1) go func() { listener, err := net.Listen("tcp", *metricsAddr) if err != nil { @@ -491,7 +493,7 @@ func main() { // Run UDP offset listener. go func() { - runOffsetListener(ctx, log, offsetListener, cache, pState, signedReflector) + runOffsetListener(ctx, log, offsetListener, cache, pState, signedReflector, m) }() // Run eviction goroutine. @@ -552,7 +554,7 @@ func main() { return case update := <-parentUpdateCh: pState.update(update.Authorities) - metrics.GeoProbeParentsDiscovered.Set(float64(len(update.Authorities))) + m.ParentsDiscovered.Set(float64(len(update.Authorities))) log.Info("Updated parent authorities from discovery", "totalParents", len(update.Authorities)) } @@ -585,12 +587,12 @@ func main() { if pd != nil { start := time.Now() pd.Tick(ctx, parentUpdateCh) - metrics.GeoProbeParentDiscoveryDuration.Observe(time.Since(start).Seconds()) + m.ParentDiscoveryDuration.Observe(time.Since(start).Seconds()) } if td != nil { start := time.Now() td.Tick(ctx, targetUpdateCh, inboundKeyCh, icmpTargetUpdateCh) - metrics.GeoProbeTargetDiscoveryDuration.Observe(time.Since(start).Seconds()) + m.TargetDiscoveryDuration.Observe(time.Since(start).Seconds()) } } @@ -621,6 +623,7 @@ func main() { senderConn: senderConn, getCurrentSlot: getCurrentSlot, signedReflector: signedReflector, + metrics: m, targetUpdateCh: targetUpdateCh, icmpTargetUpdateCh: icmpTargetUpdateCh, inboundKeyCh: inboundKeyCh, @@ -647,6 +650,7 @@ func runOffsetListener( cache *offsetCache, parents *parentState, signedReflector signed.Reflector, + m *geoprobe.Metrics, ) { log.Info("Starting offset listener", "addr", conn.LocalAddr().String()) @@ -672,7 +676,7 @@ func runOffsetListener( return } log.Warn("Failed to receive offset", "error", err) - metrics.GeoProbeErrors.WithLabelValues(metrics.GeoProbeErrorTypeOffsetReceive).Inc() + m.Errors.WithLabelValues(geoprobe.ErrorTypeOffsetReceive).Inc() continue } @@ -685,7 +689,7 @@ func runOffsetListener( expectedAuthority, knownParent := parents.getAuthority(offset.SenderPubkey) if !knownParent { log.Debug("Rejecting offset from unknown parent", "sender_pubkey", senderPK, "addr", addr) - metrics.GeoProbeOffsetsRejected.WithLabelValues(metrics.GeoProbeRejectUnknownParent).Inc() + m.OffsetsRejected.WithLabelValues(geoprobe.RejectUnknownParent).Inc() continue } if expectedAuthority != offset.AuthorityPubkey { @@ -694,14 +698,14 @@ func runOffsetListener( "expected_authority", solana.PublicKeyFromBytes(expectedAuthority[:]).String(), "actual_authority", authorityPK, "addr", addr) - metrics.GeoProbeOffsetsRejected.WithLabelValues(metrics.GeoProbeRejectWrongAuthority).Inc() + m.OffsetsRejected.WithLabelValues(geoprobe.RejectWrongAuthority).Inc() continue } // Verify signature chain (top-level and all references). if err := geoprobe.VerifyOffsetChain(offset); err != nil { log.Warn("Offset signature verification failed", "authority_pubkey", authorityPK, "addr", addr, "error", err) - metrics.GeoProbeOffsetsRejected.WithLabelValues(metrics.GeoProbeRejectInvalidSignature).Inc() + m.OffsetsRejected.WithLabelValues(geoprobe.RejectInvalidSignature).Inc() continue } @@ -709,7 +713,7 @@ func runOffsetListener( cache.Put(offset) signedReflector.SetOffsets(marshalBestOffset(cache)) - metrics.GeoProbeOffsetsReceived.Inc() + m.OffsetsReceived.Inc() log.Debug("Cached DZD offset", "authority_pubkey", authorityPK, @@ -734,6 +738,7 @@ type measurementLoop struct { senderConn *net.UDPConn getCurrentSlot func(ctx context.Context) (uint64, error) signedReflector signed.Reflector + metrics *geoprobe.Metrics targets []geoprobe.ProbeAddress icmpTargets []geoprobe.ProbeAddress @@ -810,10 +815,10 @@ func (ml *measurementLoop) run() error { func(addr geoprobe.ProbeAddress) (uint64, bool) { return ml.pinger.MeasureOne(ml.ctx, addr) }, ) ml.targets = newTargets - metrics.GeoProbeTargetsDiscovered.Set(float64(len(ml.targets))) + ml.metrics.TargetsDiscovered.Set(float64(len(ml.targets))) ml.log.Info("Updated targets from discovery", "totalTargets", len(ml.targets)) if len(rttData) > 0 { - sendCompositeOffsets(ml.ctx, ml.log, rttData, ml.cache, ml.signer, ml.senderConn, ml.getCurrentSlot) + sendCompositeOffsets(ml.ctx, ml.log, rttData, ml.cache, ml.signer, ml.senderConn, ml.getCurrentSlot, ml.metrics) } case icmpUpdate := <-ml.icmpTargetUpdateCh: @@ -825,10 +830,10 @@ func (ml *measurementLoop) run() error { func(addr geoprobe.ProbeAddress) (uint64, bool) { return ml.icmpPinger.MeasureOne(ml.ctx, addr) }, ) ml.icmpTargets = newTargets - metrics.GeoProbeIcmpTargetsDiscovered.Set(float64(len(ml.icmpTargets))) + ml.metrics.IcmpTargetsDiscovered.Set(float64(len(ml.icmpTargets))) ml.log.Info("Updated ICMP targets from discovery", "totalIcmpTargets", len(ml.icmpTargets)) if len(rttData) > 0 { - sendCompositeOffsets(ml.ctx, ml.log, rttData, ml.cache, ml.signer, ml.senderConn, ml.getCurrentSlot) + sendCompositeOffsets(ml.ctx, ml.log, rttData, ml.cache, ml.signer, ml.senderConn, ml.getCurrentSlot, ml.metrics) } case keyUpdate := <-ml.inboundKeyCh: @@ -848,7 +853,7 @@ func (ml *measurementLoop) runCycle() { ml.log.Debug("Starting measurement cycle", "targets", len(ml.targets), "icmpTargets", len(ml.icmpTargets)) start := time.Now() defer func() { - metrics.GeoProbeMeasurementCycleDuration.Observe(time.Since(start).Seconds()) + ml.metrics.MeasurementCycleDuration.Observe(time.Since(start).Seconds()) }() rttData := make(map[geoprobe.ProbeAddress]uint64) @@ -857,7 +862,7 @@ func (ml *measurementLoop) runCycle() { twampResults, err := ml.pinger.MeasureAll(ml.ctx) if err != nil { ml.log.Error("Failed to measure TWAMP targets", "error", err) - metrics.GeoProbeErrors.WithLabelValues(metrics.GeoProbeErrorTypeMeasurementCycle).Inc() + ml.metrics.Errors.WithLabelValues(geoprobe.ErrorTypeMeasurementCycle).Inc() } else { for k, v := range twampResults { rttData[k] = v @@ -868,10 +873,10 @@ func (ml *measurementLoop) runCycle() { if len(ml.icmpTargets) > 0 { icmpStart := time.Now() icmpResults, err := ml.icmpPinger.MeasureAll(ml.ctx) - metrics.GeoProbeIcmpMeasurementCycleDuration.Observe(time.Since(icmpStart).Seconds()) + ml.metrics.IcmpMeasurementCycleDuration.Observe(time.Since(icmpStart).Seconds()) if err != nil { ml.log.Error("Failed to measure ICMP targets", "error", err) - metrics.GeoProbeErrors.WithLabelValues(metrics.GeoProbeErrorTypeIcmpMeasurementCycle).Inc() + ml.metrics.Errors.WithLabelValues(geoprobe.ErrorTypeIcmpMeasurementCycle).Inc() } else { for k, v := range icmpResults { rttData[k] = v @@ -888,7 +893,7 @@ func (ml *measurementLoop) runCycle() { ml.log.Debug("target measurement result", "target", addr.Host, "rtt_ms", float64(rttNs)/1000000.0) } - sent := sendCompositeOffsets(ml.ctx, ml.log, rttData, ml.cache, ml.signer, ml.senderConn, ml.getCurrentSlot) + sent := sendCompositeOffsets(ml.ctx, ml.log, rttData, ml.cache, ml.signer, ml.senderConn, ml.getCurrentSlot, ml.metrics) ml.log.Info("Completed measurement cycle", "measured", len(rttData), @@ -905,6 +910,7 @@ func sendCompositeOffsets( signer *geoprobe.OffsetSigner, senderConn *net.UDPConn, getCurrentSlot func(ctx context.Context) (uint64, error), + m *geoprobe.Metrics, ) int { dzdOffset := cache.GetBest() if dzdOffset == nil { @@ -915,7 +921,7 @@ func sendCompositeOffsets( slot, err := getCurrentSlot(ctx) if err != nil { log.Error("Failed to get current slot", "error", err) - metrics.GeoProbeErrors.WithLabelValues(metrics.GeoProbeErrorTypeSlotFetch).Inc() + m.Errors.WithLabelValues(geoprobe.ErrorTypeSlotFetch).Inc() return 0 } @@ -937,19 +943,19 @@ func sendCompositeOffsets( if err := signer.SignOffset(&compositeOffset); err != nil { log.Error("Failed to sign composite offset", "target", addr, "error", err) - metrics.GeoProbeErrors.WithLabelValues(metrics.GeoProbeErrorTypeSignOffset).Inc() + m.Errors.WithLabelValues(geoprobe.ErrorTypeSignOffset).Inc() continue } targetAddr := &net.UDPAddr{IP: net.ParseIP(addr.Host), Port: int(addr.Port)} if err := geoprobe.SendOffset(senderConn, targetAddr, &compositeOffset); err != nil { log.Error("Failed to send composite offset", "target", addr, "error", err) - metrics.GeoProbeErrors.WithLabelValues(metrics.GeoProbeErrorTypeSendOffset).Inc() + m.Errors.WithLabelValues(geoprobe.ErrorTypeSendOffset).Inc() continue } sentCount++ - metrics.GeoProbeCompositeOffsetsSent.Inc() + m.CompositeOffsetsSent.Inc() log.Debug("Sent composite offset to target", "target", addr, "slot", slot, diff --git a/controlplane/telemetry/internal/geoprobe/metrics.go b/controlplane/telemetry/internal/geoprobe/metrics.go new file mode 100644 index 0000000000..ce5885b0ab --- /dev/null +++ b/controlplane/telemetry/internal/geoprobe/metrics.go @@ -0,0 +1,190 @@ +package geoprobe + +import ( + "github.com/prometheus/client_golang/prometheus" +) + +const ( + // Source constants identify which binary is emitting geoprobe metrics. + SourceGeoProbeAgent = "geoprobe-agent" + + // Metric names. + MetricNameBuildInfo = "doublezero_geoprobe_build_info" + MetricNameErrors = "doublezero_geoprobe_errors_total" + MetricNameParentDiscoveryDuration = "doublezero_geoprobe_parent_discovery_duration_seconds" + MetricNameTargetDiscoveryDuration = "doublezero_geoprobe_target_discovery_duration_seconds" + MetricNameMeasurementCycleDuration = "doublezero_geoprobe_measurement_cycle_duration_seconds" + MetricNameOffsetsReceived = "doublezero_geoprobe_offsets_received_total" + MetricNameOffsetsRejected = "doublezero_geoprobe_offsets_rejected_total" + MetricNameCompositeOffsetsSent = "doublezero_geoprobe_composite_offsets_sent_total" + MetricNameTargetsDiscovered = "doublezero_geoprobe_targets_discovered" + MetricNameParentsDiscovered = "doublezero_geoprobe_parents_discovered" + MetricNameIcmpTargetsDiscovered = "doublezero_geoprobe_icmp_targets_discovered" + MetricNameIcmpMeasurementCycleDuration = "doublezero_geoprobe_icmp_measurement_cycle_duration_seconds" + + // Labels. + LabelSource = "source" + LabelGeoProbePubkey = "geoprobe_pubkey" + LabelVersion = "version" + LabelCommit = "commit" + LabelDate = "date" + LabelErrorType = "error_type" + LabelReason = "reason" + + // Error types. + ErrorTypeMeasurementCycle = "measurement_cycle" + ErrorTypeSlotFetch = "slot_fetch" + ErrorTypeSignOffset = "sign_offset" + ErrorTypeSendOffset = "send_offset" + ErrorTypeOffsetReceive = "offset_receive" + ErrorTypeIcmpMeasurementCycle = "icmp_measurement_cycle" + + // Offset rejection reasons. + RejectUnknownParent = "unknown_parent" + RejectWrongAuthority = "wrong_authority" + RejectInvalidSignature = "invalid_signature" +) + +// discoveryBuckets covers RPC-heavy discovery operations which commonly +// take 1-30s depending on network conditions and validator load. +var discoveryBuckets = []float64{0.1, 0.25, 0.5, 1, 2.5, 5, 10, 15, 30, 60} + +// measurementBuckets covers full measurement cycles which include TWAMP +// probes across multiple targets and can take 30s+. +var measurementBuckets = []float64{0.5, 1, 2.5, 5, 10, 15, 30, 60, 120} + +// Metrics holds all Prometheus collectors for the geoprobe subsystem. +type Metrics struct { + BuildInfo *prometheus.GaugeVec + Errors *prometheus.CounterVec + ParentDiscoveryDuration prometheus.Histogram + TargetDiscoveryDuration prometheus.Histogram + MeasurementCycleDuration prometheus.Histogram + OffsetsReceived prometheus.Counter + OffsetsRejected *prometheus.CounterVec + CompositeOffsetsSent prometheus.Counter + TargetsDiscovered prometheus.Gauge + ParentsDiscovered prometheus.Gauge + IcmpTargetsDiscovered prometheus.Gauge + IcmpMeasurementCycleDuration prometheus.Histogram +} + +// NewMetrics creates and registers all geoprobe Prometheus collectors. +// The source and geoProbePubkey values are applied as constant labels on every metric. +func NewMetrics(source, geoProbePubkey string, reg prometheus.Registerer) *Metrics { + constLabels := prometheus.Labels{ + LabelSource: source, + LabelGeoProbePubkey: geoProbePubkey, + } + + m := &Metrics{ + BuildInfo: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: MetricNameBuildInfo, + Help: "Build information of the geoprobe agent", + ConstLabels: constLabels, + }, + []string{LabelVersion, LabelCommit, LabelDate}, + ), + Errors: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: MetricNameErrors, + Help: "Number of errors encountered by the geoprobe agent", + ConstLabels: constLabels, + }, + []string{LabelErrorType}, + ), + ParentDiscoveryDuration: prometheus.NewHistogram( + prometheus.HistogramOpts{ + Name: MetricNameParentDiscoveryDuration, + Help: "Duration of parent discovery ticks in seconds", + Buckets: discoveryBuckets, + ConstLabels: constLabels, + }, + ), + TargetDiscoveryDuration: prometheus.NewHistogram( + prometheus.HistogramOpts{ + Name: MetricNameTargetDiscoveryDuration, + Help: "Duration of target discovery ticks in seconds", + Buckets: discoveryBuckets, + ConstLabels: constLabels, + }, + ), + MeasurementCycleDuration: prometheus.NewHistogram( + prometheus.HistogramOpts{ + Name: MetricNameMeasurementCycleDuration, + Help: "Duration of a full measurement cycle in seconds", + Buckets: measurementBuckets, + ConstLabels: constLabels, + }, + ), + OffsetsReceived: prometheus.NewCounter( + prometheus.CounterOpts{ + Name: MetricNameOffsetsReceived, + Help: "Total DZD offsets received and cached", + ConstLabels: constLabels, + }, + ), + OffsetsRejected: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: MetricNameOffsetsRejected, + Help: "Total DZD offsets rejected", + ConstLabels: constLabels, + }, + []string{LabelReason}, + ), + CompositeOffsetsSent: prometheus.NewCounter( + prometheus.CounterOpts{ + Name: MetricNameCompositeOffsetsSent, + Help: "Total composite offsets sent to targets", + ConstLabels: constLabels, + }, + ), + TargetsDiscovered: prometheus.NewGauge( + prometheus.GaugeOpts{ + Name: MetricNameTargetsDiscovered, + Help: "Current number of discovered targets", + ConstLabels: constLabels, + }, + ), + ParentsDiscovered: prometheus.NewGauge( + prometheus.GaugeOpts{ + Name: MetricNameParentsDiscovered, + Help: "Current number of discovered parents", + ConstLabels: constLabels, + }, + ), + IcmpTargetsDiscovered: prometheus.NewGauge( + prometheus.GaugeOpts{ + Name: MetricNameIcmpTargetsDiscovered, + Help: "Current number of discovered ICMP targets", + ConstLabels: constLabels, + }, + ), + IcmpMeasurementCycleDuration: prometheus.NewHistogram( + prometheus.HistogramOpts{ + Name: MetricNameIcmpMeasurementCycleDuration, + Help: "Duration of ICMP measurement cycles in seconds", + Buckets: measurementBuckets, + ConstLabels: constLabels, + }, + ), + } + + reg.MustRegister( + m.BuildInfo, + m.Errors, + m.ParentDiscoveryDuration, + m.TargetDiscoveryDuration, + m.MeasurementCycleDuration, + m.OffsetsReceived, + m.OffsetsRejected, + m.CompositeOffsetsSent, + m.TargetsDiscovered, + m.ParentsDiscovered, + m.IcmpTargetsDiscovered, + m.IcmpMeasurementCycleDuration, + ) + + return m +} diff --git a/controlplane/telemetry/internal/geoprobe/metrics_test.go b/controlplane/telemetry/internal/geoprobe/metrics_test.go new file mode 100644 index 0000000000..959fd85235 --- /dev/null +++ b/controlplane/telemetry/internal/geoprobe/metrics_test.go @@ -0,0 +1,144 @@ +package geoprobe + +import ( + "testing" + + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" +) + +func TestNewMetrics(t *testing.T) { + reg := prometheus.NewRegistry() + m := NewMetrics(SourceGeoProbeAgent, "DevPK123", reg) + + if m.BuildInfo == nil { + t.Fatal("BuildInfo is nil") + } + if m.Errors == nil { + t.Fatal("Errors is nil") + } + if m.ParentDiscoveryDuration == nil { + t.Fatal("ParentDiscoveryDuration is nil") + } + if m.TargetDiscoveryDuration == nil { + t.Fatal("TargetDiscoveryDuration is nil") + } + if m.MeasurementCycleDuration == nil { + t.Fatal("MeasurementCycleDuration is nil") + } + if m.OffsetsReceived == nil { + t.Fatal("OffsetsReceived is nil") + } + if m.OffsetsRejected == nil { + t.Fatal("OffsetsRejected is nil") + } + if m.CompositeOffsetsSent == nil { + t.Fatal("CompositeOffsetsSent is nil") + } + if m.TargetsDiscovered == nil { + t.Fatal("TargetsDiscovered is nil") + } + if m.ParentsDiscovered == nil { + t.Fatal("ParentsDiscovered is nil") + } + if m.IcmpTargetsDiscovered == nil { + t.Fatal("IcmpTargetsDiscovered is nil") + } + if m.IcmpMeasurementCycleDuration == nil { + t.Fatal("IcmpMeasurementCycleDuration is nil") + } +} + +func TestNewMetrics_ConstantLabels(t *testing.T) { + reg := prometheus.NewRegistry() + m := NewMetrics(SourceGeoProbeAgent, "DevPK456", reg) + + // Increment a counter to make it collectible. + m.Errors.WithLabelValues(ErrorTypeMeasurementCycle).Inc() + + metricFamilies, err := reg.Gather() + if err != nil { + t.Fatal("Failed to gather metrics:", err) + } + + found := false + for _, mf := range metricFamilies { + if mf.GetName() == MetricNameErrors { + found = true + metric := mf.GetMetric()[0] + assertLabel(t, metric, LabelSource, SourceGeoProbeAgent) + assertLabel(t, metric, LabelGeoProbePubkey, "DevPK456") + assertLabel(t, metric, LabelErrorType, ErrorTypeMeasurementCycle) + } + } + if !found { + t.Fatal("errors_total metric not found in gathered metrics") + } +} + +func TestNewMetrics_RegistersTwelveCollectors(t *testing.T) { + reg := prometheus.NewRegistry() + m := NewMetrics(SourceGeoProbeAgent, "DevPK789", reg) + + // Touch vec-based metrics so they appear in Gather output. + m.BuildInfo.WithLabelValues("v1", "abc", "today").Set(1) + m.Errors.WithLabelValues(ErrorTypeMeasurementCycle).Inc() + m.OffsetsRejected.WithLabelValues(RejectUnknownParent).Inc() + + metricFamilies, err := reg.Gather() + if err != nil { + t.Fatal("Failed to gather metrics:", err) + } + + expectedNames := map[string]bool{ + MetricNameBuildInfo: false, + MetricNameErrors: false, + MetricNameParentDiscoveryDuration: false, + MetricNameTargetDiscoveryDuration: false, + MetricNameMeasurementCycleDuration: false, + MetricNameOffsetsReceived: false, + MetricNameOffsetsRejected: false, + MetricNameCompositeOffsetsSent: false, + MetricNameTargetsDiscovered: false, + MetricNameParentsDiscovered: false, + MetricNameIcmpTargetsDiscovered: false, + MetricNameIcmpMeasurementCycleDuration: false, + } + + for _, mf := range metricFamilies { + if _, ok := expectedNames[mf.GetName()]; ok { + expectedNames[mf.GetName()] = true + } + } + + for name, found := range expectedNames { + if !found { + t.Errorf("expected metric %q not found in gathered output", name) + } + } +} + +func TestNewMetrics_DuplicateRegistrationPanics(t *testing.T) { + reg := prometheus.NewRegistry() + NewMetrics(SourceGeoProbeAgent, "PK1", reg) + + defer func() { + if r := recover(); r == nil { + t.Fatal("expected panic on duplicate registration") + } + }() + NewMetrics(SourceGeoProbeAgent, "PK1", reg) +} + +func assertLabel(t *testing.T, metric *dto.Metric, name, expectedValue string) { + t.Helper() + for _, lp := range metric.GetLabel() { + if lp.GetName() == name { + if lp.GetValue() != expectedValue { + t.Errorf("label %q: got %q, want %q", name, lp.GetValue(), expectedValue) + } + return + } + } + t.Errorf("label %q not found on metric", name) +} diff --git a/controlplane/telemetry/internal/metrics/geolocation_metrics.go b/controlplane/telemetry/internal/metrics/geolocation_metrics.go deleted file mode 100644 index 5978e712c9..0000000000 --- a/controlplane/telemetry/internal/metrics/geolocation_metrics.go +++ /dev/null @@ -1,139 +0,0 @@ -package metrics - -import ( - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" -) - -const ( - // GeoProbe agent metric names. - GeoProbeMetricNameBuildInfo = "doublezero_device_geoprobe_agent_build_info" - GeoProbeMetricNameErrors = "doublezero_device_geoprobe_agent_errors_total" - GeoProbeMetricNameParentDiscoveryDuration = "doublezero_device_geoprobe_agent_parent_discovery_duration_seconds" - GeoProbeMetricNameTargetDiscoveryDuration = "doublezero_device_geoprobe_agent_target_discovery_duration_seconds" - GeoProbeMetricNameMeasurementCycleDuration = "doublezero_device_geoprobe_agent_measurement_cycle_duration_seconds" - GeoProbeMetricNameOffsetsReceived = "doublezero_device_geoprobe_agent_offsets_received_total" - GeoProbeMetricNameOffsetsRejected = "doublezero_device_geoprobe_agent_offsets_rejected_total" - GeoProbeMetricNameCompositeOffsetsSent = "doublezero_device_geoprobe_agent_composite_offsets_sent_total" - GeoProbeMetricNameTargetsDiscovered = "doublezero_device_geoprobe_agent_targets_discovered" - GeoProbeMetricNameParentsDiscovered = "doublezero_device_geoprobe_agent_parents_discovered" - GeoProbeMetricNameIcmpTargetsDiscovered = "doublezero_device_geoprobe_agent_icmp_targets_discovered" - GeoProbeMetricNameIcmpMeasurementCycleDuration = "doublezero_device_geoprobe_agent_icmp_measurement_cycle_duration_seconds" - - // GeoProbe agent labels. - GeoProbeMetricLabelReason = "reason" - - // GeoProbe agent error types. - GeoProbeErrorTypeMeasurementCycle = "measurement_cycle" - GeoProbeErrorTypeSlotFetch = "slot_fetch" - GeoProbeErrorTypeSignOffset = "sign_offset" - GeoProbeErrorTypeSendOffset = "send_offset" - GeoProbeErrorTypeOffsetReceive = "offset_receive" - GeoProbeErrorTypeIcmpMeasurementCycle = "icmp_measurement_cycle" - - // Offset rejection reasons. - GeoProbeRejectUnknownParent = "unknown_parent" - GeoProbeRejectWrongAuthority = "wrong_authority" - GeoProbeRejectInvalidSignature = "invalid_signature" -) - -// geoProbeDiscoveryBuckets covers RPC-heavy discovery operations which commonly -// take 1-30s depending on network conditions and validator load. -var geoProbeDiscoveryBuckets = []float64{0.1, 0.25, 0.5, 1, 2.5, 5, 10, 15, 30, 60} - -// geoProbeMeasurementBuckets covers full measurement cycles which include TWAMP -// probes across multiple targets and can take 30s+. -var geoProbeMeasurementBuckets = []float64{0.5, 1, 2.5, 5, 10, 15, 30, 60, 120} - -var ( - GeoProbeBuildInfo = promauto.NewGaugeVec( - prometheus.GaugeOpts{ - Name: GeoProbeMetricNameBuildInfo, - Help: "Build information of the geoprobe agent", - }, - []string{LabelVersion, LabelCommit, LabelDate}, - ) - - GeoProbeErrors = promauto.NewCounterVec( - prometheus.CounterOpts{ - Name: GeoProbeMetricNameErrors, - Help: "Number of errors encountered by the geoprobe agent", - }, - []string{LabelErrorType}, - ) - - GeoProbeParentDiscoveryDuration = promauto.NewHistogram( - prometheus.HistogramOpts{ - Name: GeoProbeMetricNameParentDiscoveryDuration, - Help: "Duration of parent discovery ticks in seconds", - Buckets: geoProbeDiscoveryBuckets, - }, - ) - - GeoProbeTargetDiscoveryDuration = promauto.NewHistogram( - prometheus.HistogramOpts{ - Name: GeoProbeMetricNameTargetDiscoveryDuration, - Help: "Duration of target discovery ticks in seconds", - Buckets: geoProbeDiscoveryBuckets, - }, - ) - - GeoProbeMeasurementCycleDuration = promauto.NewHistogram( - prometheus.HistogramOpts{ - Name: GeoProbeMetricNameMeasurementCycleDuration, - Help: "Duration of a full measurement cycle in seconds", - Buckets: geoProbeMeasurementBuckets, - }, - ) - - GeoProbeOffsetsReceived = promauto.NewCounter( - prometheus.CounterOpts{ - Name: GeoProbeMetricNameOffsetsReceived, - Help: "Total DZD offsets received and cached", - }, - ) - - GeoProbeOffsetsRejected = promauto.NewCounterVec( - prometheus.CounterOpts{ - Name: GeoProbeMetricNameOffsetsRejected, - Help: "Total DZD offsets rejected", - }, - []string{GeoProbeMetricLabelReason}, - ) - - GeoProbeCompositeOffsetsSent = promauto.NewCounter( - prometheus.CounterOpts{ - Name: GeoProbeMetricNameCompositeOffsetsSent, - Help: "Total composite offsets sent to targets", - }, - ) - - GeoProbeTargetsDiscovered = promauto.NewGauge( - prometheus.GaugeOpts{ - Name: GeoProbeMetricNameTargetsDiscovered, - Help: "Current number of discovered targets", - }, - ) - - GeoProbeParentsDiscovered = promauto.NewGauge( - prometheus.GaugeOpts{ - Name: GeoProbeMetricNameParentsDiscovered, - Help: "Current number of discovered parents", - }, - ) - - GeoProbeIcmpTargetsDiscovered = promauto.NewGauge( - prometheus.GaugeOpts{ - Name: GeoProbeMetricNameIcmpTargetsDiscovered, - Help: "Current number of discovered ICMP targets", - }, - ) - - GeoProbeIcmpMeasurementCycleDuration = promauto.NewHistogram( - prometheus.HistogramOpts{ - Name: GeoProbeMetricNameIcmpMeasurementCycleDuration, - Help: "Duration of ICMP measurement cycles in seconds", - Buckets: geoProbeMeasurementBuckets, - }, - ) -)