Skip to content

Commit 65a5d16

Browse files
committed
feat: add process exporter support and configuration to metrics agent
1 parent 46fd7cd commit 65a5d16

File tree

6 files changed

+229
-16
lines changed

6 files changed

+229
-16
lines changed

cmd/start.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,8 @@ func runAgent(cmd *cobra.Command, args []string) error {
103103
switch exporterCfg.Name {
104104
case "node_exporter":
105105
exp = exporters.NewNodeExporter(exporterCfg.Endpoint, exporterCfg.Timeout)
106+
case "process_exporter":
107+
exp = exporters.NewProcessExporter(exporterCfg.Endpoint, exporterCfg.Timeout)
106108
default:
107109
logger.Warn("Unknown exporter type, skipping", logger.String("name", exporterCfg.Name))
108110
continue

internal/exporters/exporter.go

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,4 @@ type Exporter interface {
1717

1818
// Verify checks if the exporter is accessible (used at startup)
1919
Verify() error
20-
21-
// DefaultEndpoint returns the standard endpoint for this exporter
22-
// Example: "http://localhost:9100/metrics" for node_exporter
23-
DefaultEndpoint() string
24-
25-
// DefaultInterval returns the recommended scrape interval
26-
// Example: 15s for node_exporter, 30s for postgres_exporter
27-
DefaultInterval() time.Duration
2820
}

internal/exporters/node_exporter.go

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -82,11 +82,3 @@ func (n *NodeExporter) Verify() error {
8282
logger.Info("node_exporter verified", logger.String("endpoint", n.endpoint))
8383
return nil
8484
}
85-
86-
func (n *NodeExporter) DefaultEndpoint() string {
87-
return "http://localhost:9100/metrics"
88-
}
89-
90-
func (n *NodeExporter) DefaultInterval() time.Duration {
91-
return 15 * time.Second
92-
}
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
package exporters
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"io"
7+
"net/http"
8+
"time"
9+
)
10+
11+
// ProcessExporter represents a Prometheus process_exporter instance
12+
type ProcessExporter struct {
13+
name string
14+
endpoint string
15+
timeout time.Duration
16+
client *http.Client
17+
}
18+
19+
// NewProcessExporter creates a new ProcessExporter instance
20+
func NewProcessExporter(endpoint string, timeout time.Duration) *ProcessExporter {
21+
// Use defaults if not specified
22+
if endpoint == "" {
23+
endpoint = "http://127.0.0.1:9256/metrics"
24+
}
25+
if timeout == 0 {
26+
timeout = 3 * time.Second
27+
}
28+
29+
return &ProcessExporter{
30+
name: "process_exporter",
31+
endpoint: endpoint,
32+
timeout: timeout,
33+
client: &http.Client{
34+
Timeout: timeout,
35+
},
36+
}
37+
}
38+
39+
// Name returns the exporter name
40+
func (e *ProcessExporter) Name() string {
41+
return e.name
42+
}
43+
44+
// Endpoint returns the metrics endpoint URL
45+
func (e *ProcessExporter) Endpoint() string {
46+
return e.endpoint
47+
}
48+
49+
// Scrape fetches metrics from process_exporter
50+
func (e *ProcessExporter) Scrape(ctx context.Context) ([]byte, error) {
51+
req, err := http.NewRequestWithContext(ctx, "GET", e.endpoint, nil)
52+
if err != nil {
53+
return nil, fmt.Errorf("failed to create request: %w", err)
54+
}
55+
56+
resp, err := e.client.Do(req)
57+
if err != nil {
58+
return nil, fmt.Errorf("HTTP request failed: %w", err)
59+
}
60+
defer resp.Body.Close()
61+
62+
if resp.StatusCode != http.StatusOK {
63+
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
64+
}
65+
66+
data, err := io.ReadAll(resp.Body)
67+
if err != nil {
68+
return nil, fmt.Errorf("failed to read response: %w", err)
69+
}
70+
71+
return data, nil
72+
}
73+
74+
// Verify checks if the exporter is accessible
75+
func (e *ProcessExporter) Verify() error {
76+
ctx, cancel := context.WithTimeout(context.Background(), e.timeout)
77+
defer cancel()
78+
79+
_, err := e.Scrape(ctx)
80+
return err
81+
}
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
package prometheus
2+
3+
import (
4+
"bufio"
5+
"bytes"
6+
"fmt"
7+
"strings"
8+
"time"
9+
)
10+
11+
// ProcessExporterMetricSnapshot represents parsed process metrics from process_exporter
12+
// This captures per-process group metrics (grouped by command name)
13+
type ProcessExporterMetricSnapshot struct {
14+
Timestamp time.Time `json:"timestamp"`
15+
Processes []ProcessMetric `json:"processes"`
16+
}
17+
18+
// ProcessMetric represents metrics for a single process group (e.g., all "nginx" processes)
19+
type ProcessMetric struct {
20+
Name string `json:"name"` // Process name (groupname from process_exporter)
21+
NumProcs int `json:"num_procs"` // Number of processes in this group
22+
CPUSecondsTotal float64 `json:"cpu_seconds_total"` // Total CPU time consumed (counter)
23+
MemoryBytes int64 `json:"memory_bytes"` // Resident memory (RSS) in bytes
24+
}
25+
26+
// ParseProcessExporterMetrics parses Prometheus process_exporter text format
27+
// Extracts per-process group metrics (CPU, memory, count)
28+
//
29+
// Expected metrics from process_exporter:
30+
// - namedprocess_namegroup_num_procs{groupname="nginx"} 4
31+
// - namedprocess_namegroup_cpu_seconds_total{groupname="nginx"} 1234.56
32+
// - namedprocess_namegroup_memory_bytes{groupname="nginx",memtype="resident"} 104857600
33+
func ParseProcessExporterMetrics(data []byte) (*ProcessExporterMetricSnapshot, error) {
34+
snapshot := &ProcessExporterMetricSnapshot{
35+
Timestamp: time.Now().UTC(),
36+
Processes: []ProcessMetric{},
37+
}
38+
39+
scanner := bufio.NewScanner(bytes.NewReader(data))
40+
41+
// Track metrics per process group (groupname)
42+
processMetrics := make(map[string]*ProcessMetric)
43+
44+
for scanner.Scan() {
45+
line := scanner.Text()
46+
47+
// Skip comments and empty lines
48+
if len(line) == 0 || line[0] == '#' {
49+
continue
50+
}
51+
52+
// Parse metric line
53+
if err := parseProcessLine(line, processMetrics); err != nil {
54+
// Log but don't fail on individual parse errors
55+
continue
56+
}
57+
}
58+
59+
if err := scanner.Err(); err != nil {
60+
return nil, fmt.Errorf("scanner error: %w", err)
61+
}
62+
63+
// Convert map to slice
64+
for _, pm := range processMetrics {
65+
// Only include processes that have at least 1 running instance
66+
if pm.NumProcs > 0 {
67+
snapshot.Processes = append(snapshot.Processes, *pm)
68+
}
69+
}
70+
71+
return snapshot, nil
72+
}
73+
74+
func parseProcessLine(line string, processMetrics map[string]*ProcessMetric) error {
75+
// Split metric name and value
76+
parts := strings.Fields(line)
77+
if len(parts) < 2 {
78+
return fmt.Errorf("invalid line format")
79+
}
80+
81+
metricPart := parts[0]
82+
valuePart := parts[1]
83+
84+
// Extract metric name and labels
85+
var metricName string
86+
var labels map[string]string
87+
88+
if idx := strings.Index(metricPart, "{"); idx != -1 {
89+
metricName = metricPart[:idx]
90+
labelsStr := metricPart[idx+1 : len(metricPart)-1] // Remove {}
91+
labels = parseLabels(labelsStr)
92+
} else {
93+
metricName = metricPart
94+
labels = make(map[string]string)
95+
}
96+
97+
value, err := parseValue(valuePart)
98+
if err != nil {
99+
return err
100+
}
101+
102+
// Extract groupname (process name)
103+
groupname, ok := labels["groupname"]
104+
if !ok {
105+
return fmt.Errorf("missing groupname label")
106+
}
107+
108+
// Ensure process metric entry exists
109+
if processMetrics[groupname] == nil {
110+
processMetrics[groupname] = &ProcessMetric{
111+
Name: groupname,
112+
}
113+
}
114+
115+
pm := processMetrics[groupname]
116+
117+
// Parse specific metrics
118+
switch metricName {
119+
case "namedprocess_namegroup_num_procs":
120+
pm.NumProcs = int(value)
121+
122+
case "namedprocess_namegroup_cpu_seconds_total":
123+
pm.CPUSecondsTotal = value
124+
125+
case "namedprocess_namegroup_memory_bytes":
126+
// Only use resident memory (RSS)
127+
memtype, ok := labels["memtype"]
128+
if ok && memtype == "resident" {
129+
pm.MemoryBytes = int64(value)
130+
}
131+
}
132+
133+
return nil
134+
}
135+
136+
// Note: parseLabels() and parseValue() are already defined in node_exporter_parser.go
137+
// They are package-level functions shared across all parsers in the prometheus package

nodepulse.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,15 @@ exporters:
3131
interval: 15s # Optional: Fast scraping for system metrics (falls back to agent.interval if not specified)
3232
timeout: 3s
3333

34+
# Process Exporter - Per-process metrics (CPU, memory by process name)
35+
# NOTE: Requires process_exporter to be installed and running
36+
# See: ansible/roles/process-exporter/ for deployment
37+
- name: process_exporter
38+
enabled: false # Disabled by default - enable after deploying process_exporter
39+
endpoint: "http://localhost:9256/metrics"
40+
interval: 15s # Same as node_exporter for consistency
41+
timeout: 3s
42+
3443
# Example: PostgreSQL Exporter (uncomment to enable)
3544
# - name: postgres_exporter
3645
# enabled: true

0 commit comments

Comments
 (0)