Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,20 @@ All notable changes to this project will be documented in this file.
- CLI
- Remove log noise on resolve route
- Onchain programs
- Removed device and user allowlist functionality, updating the global state, initialization flow, tests, and processors accordingly, and cleaning up unused account checks.
- Serviceability: require DeactivateMulticastGroup to only close multicast group accounts when both `publisher_count` and `subscriber_count` are zero, preventing deletion of groups that still have active publishers or subscribers.
- Deprecated the user suspend status, as it is no longer used.
- Serviceability: enforce that CloseAccountUser instructions verify the target user has no multicast publishers or subscribers (both `publishers` and `subscribers` are empty) before closing, and add regression coverage for this behavior.
- Enhance access pass functionality with new Solana-specific types
- fix(smartcontract): skip resource alloc in ActivateUser if already allocated ([#2800](https://github.com/malbeclabs/doublezero/pull/2800))
- Removed device and user allowlist functionality, updating the global state, initialization flow, tests, and processors accordingly, and cleaning up unused account checks.
- Serviceability: require DeactivateMulticastGroup to only close multicast group accounts when both `publisher_count` and `subscriber_count` are zero, preventing deletion of groups that still have active publishers or subscribers.
- Deprecated the user suspend status, as it is no longer used.
- Serviceability: enforce that CloseAccountUser instructions verify the target user has no multicast publishers or subscribers (both `publishers` and `subscribers` are empty) before closing, and add regression coverage for this behavior.
- Enhance access pass functionality with new Solana-specific types
- Telemetry
- Fix goroutine leak in TWAMP sender — `cleanUpReceived` goroutines now exit on `Close()` instead of living until process shutdown
- CLI
- Enhance delete multicast group command to cascade into deleting AP entry (#2754)
- Added activation check for existing users before subscribing to new groups (#2782)
- Client
- Cache network interface index/name lookups in liveness UDP service to fix high CPU usage caused by per-packet RTM_GETLINK netlink dumps
- Add observability to BGP handleUpdate: log withdrawal/NLRI counts per batch and track processing duration via `doublezero_bgp_handle_update_duration_seconds` histogram
- E2E tests
- The QA alldevices test now skips devices that are not calling the controller

## [v0.8.4](https://github.com/malbeclabs/doublezero/compare/client/v0.8.3...client/v0.8.4) – 2026-01-28

Expand Down
90 changes: 90 additions & 0 deletions e2e/internal/qa/device_assignment.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,100 @@
package qa

import (
"context"
"encoding/json"
"fmt"
"math"
"net"
"net/http"
"net/url"
"os"
"strings"
"time"
)

type GrafanaConfig struct {
PrometheusURL string
Username string
APIKey string
}

func GrafanaConfigFromEnv() *GrafanaConfig {
prometheusURL := os.Getenv("GRAFANA_PROMETHEUS_URL")
user := os.Getenv("GRAFANA_PROMETHEUS_USER")
apiKey := os.Getenv("GRAFANA_API_KEY")

if prometheusURL == "" || apiKey == "" {
return nil
}

return &GrafanaConfig{
PrometheusURL: strings.TrimSuffix(prometheusURL, "/"),
Username: user,
APIKey: apiKey,
}
}

func GetDevicesWithActiveConfigAgents(ctx context.Context, cfg *GrafanaConfig) (map[string]bool, error) {
if cfg == nil {
return nil, fmt.Errorf("grafana config is nil")
}

// Query for all devices with GetConfig activity in the last 5m
query := `sum by (device_code) (increase(controller_grpc_getconfig_requests_total[5m])) > 0`

// The PrometheusURL already includes /api/prom, so we just append /api/v1/query
queryURL := fmt.Sprintf("%s/api/v1/query?query=%s", cfg.PrometheusURL, url.QueryEscape(query))

ctx, cancel := context.WithTimeout(ctx, 10*time.Second)
defer cancel()

req, err := http.NewRequestWithContext(ctx, http.MethodGet, queryURL, nil)
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
// Grafana Cloud Prometheus uses Basic Auth with instance ID and API key
req.SetBasicAuth(cfg.Username, cfg.APIKey)

resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to query grafana: %w", err)
}
defer resp.Body.Close()

if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("grafana query failed with status: %d", resp.StatusCode)
}

var result struct {
Status string `json:"status"`
Data struct {
ResultType string `json:"resultType"`
Result []struct {
Metric map[string]string `json:"metric"`
Value []any `json:"value"`
} `json:"result"`
} `json:"data"`
}

if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return nil, fmt.Errorf("failed to decode response: %w", err)
}

if result.Status != "success" {
return nil, fmt.Errorf("query returned non-success status: %s", result.Status)
}

active := make(map[string]bool)
for _, r := range result.Data.Result {
if deviceCode, ok := r.Metric["device_code"]; ok && deviceCode != "" {
active[deviceCode] = true
}
}

return active, nil
}

const LatencyThresholdMs = 25

type ClientLatencies map[string]map[string]float64
Expand Down
19 changes: 19 additions & 0 deletions e2e/qa_alldevices_unicast_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,25 @@ func TestQA_AllDevices_UnicastConnectivity(t *testing.T) {
return false
})

// Filter out devices that are not actively calling the controller
if grafanaCfg := qa.GrafanaConfigFromEnv(); grafanaCfg != nil {
activeDevices, err := qa.GetDevicesWithActiveConfigAgents(ctx, grafanaCfg)
if err != nil {
log.Warn("Failed to query Grafana for active devices, proceeding with all devices", "error", err)
} else {
log.Info("Filtering devices by controller activity", "activeDeviceCount", len(activeDevices))
devices = slices.DeleteFunc(devices, func(d *qa.Device) bool {
if !activeDevices[d.Code] {
log.Info("Skipping device not calling controller", "device", d.Code)
return true
}
return false
})
}
} else {
log.Info("No Grafana config found, including all devices regardless of controller activity")
}

// If devices flag is provided, filter devices to only include those in the list.
if *devicesFlag != "" {
deviceCodes := make(map[string]struct{})
Expand Down
Loading