-
Notifications
You must be signed in to change notification settings - Fork 0
/
node_event_handler.go
230 lines (206 loc) · 6.24 KB
/
node_event_handler.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
package scraper
import (
"context"
"fmt"
"log/slog"
"sync"
"time"
"github.com/jackc/pgx/v5/pgtype"
v1 "k8s.io/api/core/v1"
listerv1 "k8s.io/client-go/listers/core/v1"
"github.com/r2k1/pgkube/app/k8s"
"github.com/r2k1/pgkube/app/queries"
)
// type alias for mock generation
type PodLister listerv1.PodLister
type PodNamespaceLister listerv1.PodNamespaceLister
type PodCache interface {
Get(namespace, name string) (*v1.Pod, error)
}
// PodCacheK8s is a wrapper around listerv1.PodLister
// original client is hard to mock
// wrapper provides an easier to use interface
type PodCacheK8s struct {
lister listerv1.PodLister
}
func NewPodCacheK8s(lister listerv1.PodLister) *PodCacheK8s {
return &PodCacheK8s{
lister: lister,
}
}
func (p *PodCacheK8s) Get(namespace, name string) (*v1.Pod, error) {
pod, err := p.lister.Pods(namespace).Get(name)
if err != nil {
return nil, fmt.Errorf("getting pod from cache: %w", err)
}
return pod, nil
}
type NodeScraper struct {
nodeName string
k8sClients k8s.ClientInterface
queries *queries.Queries
prevCPUSecondsTotal k8s.PodMetric
prevCores k8s.PodMetric
mutex sync.Mutex
cache PodCache
}
func NewNodeScrapper(name string, k8sClients k8s.ClientInterface, queries *queries.Queries, cache PodCache) *NodeScraper {
return &NodeScraper{
nodeName: name,
k8sClients: k8sClients,
queries: queries,
prevCPUSecondsTotal: make(k8s.PodMetric),
prevCores: make(k8s.PodMetric),
mutex: sync.Mutex{},
cache: cache,
}
}
func (s *NodeScraper) Scrape(ctx context.Context) error {
metrics, err := s.k8sClients.NodeMetrics(ctx, s.nodeName)
if err != nil {
return err
}
cpuData := s.cpuData(metrics.PodCPUUsageSecondsTotal)
if len(cpuData) > 0 {
if err := s.queries.UpsertPodUsedCPU(ctx, cpuData); err != nil {
return fmt.Errorf("upserting pod used cpu: %w", err)
}
slog.Debug("updated pod CPU usage", "node", s.nodeName, "count", len(cpuData))
}
memoryData := s.memoryData(metrics.PodMemoryWorkingSetBytes)
if len(memoryData) > 0 {
if err := s.queries.UpsertPodUsedMemory(ctx, memoryData); err != nil {
return fmt.Errorf("upserting pod used memory: %w", err)
}
slog.Debug("updated pod memory usage", "node", s.nodeName, "count", len(memoryData))
}
return nil
}
func (s *NodeScraper) cpuData(currentCPUSecondsTotal k8s.PodMetric) []queries.UpsertPodUsedCPUParams {
// cpu usage is reported in total seconds consumed by the pod
// in order to calculate avg core/sec we need to calculate the difference between current and previous value
// cpu usage is calculated as (current - previous) / (current timestamp - previous timestamp)
s.mutex.Lock()
defer s.mutex.Unlock()
podCores := make(k8s.PodMetric, len(currentCPUSecondsTotal))
for key, value := range currentCPUSecondsTotal {
prevValue, ok := s.prevCPUSecondsTotal[key]
if !ok {
continue
}
var cores float64
if prevValue.TimestampMs != value.TimestampMs {
cores = (value.Value - prevValue.Value) / float64((value.TimestampMs-prevValue.TimestampMs)/1000)
podCores[key] = k8s.MetricValue{
Value: cores,
TimestampMs: value.TimestampMs,
}
} else {
// if previous timestamp is the same as current it means metrics server hasn't updated the usage yet
// so we don't know the current usage yet, use previous value
value, ok = s.prevCores[key]
if !ok {
continue
}
podCores[key] = value
}
}
result := make([]queries.UpsertPodUsedCPUParams, 0, len(podCores))
for key, value := range podCores {
pod, err := s.cache.Get(key.Namespace, key.Name)
if err != nil {
slog.Error("could not find pod in cache", "namespace", key.Namespace, "name", key.Name)
continue
}
pgUUID, err := parsePGUUID(pod.UID)
if err != nil {
slog.Error("parsing uuid", "error", err)
continue
}
result = append(result, queries.UpsertPodUsedCPUParams{
Timestamp: pgtype.Timestamptz{
Time: truncateToHour(time.UnixMilli(value.TimestampMs)).UTC(),
Valid: true,
},
PodUid: pgUUID,
CpuCores: value.Value,
})
}
s.prevCPUSecondsTotal = currentCPUSecondsTotal
s.prevCores = podCores
return result
}
func (s *NodeScraper) memoryData(currentPodMemoryUsed k8s.PodMetric) []queries.UpsertPodUsedMemoryParams {
result := make([]queries.UpsertPodUsedMemoryParams, 0, len(currentPodMemoryUsed))
for key, value := range currentPodMemoryUsed {
pod, err := s.cache.Get(key.Namespace, key.Name)
if err != nil {
slog.Error("could not find pod in cache", "namespace", key.Namespace, "name", key.Name)
continue
}
pgUUID, err := parsePGUUID(pod.UID)
if err != nil {
slog.Error("parsing uuid", "error", err)
continue
}
result = append(result, queries.UpsertPodUsedMemoryParams{
Timestamp: pgtype.Timestamptz{
Time: truncateToHour(time.UnixMilli(value.TimestampMs)).UTC(),
Valid: true,
},
PodUid: pgUUID,
MemoryBytes: value.Value,
})
}
return result
}
type NodeEventHandler struct {
manager *Manager
k8sClient k8s.ClientInterface
queries *queries.Queries
interval time.Duration
cache PodCache
}
func NewNodeEventHandler(
manager *Manager,
k8sClient k8s.ClientInterface,
queries *queries.Queries,
interval time.Duration,
cache PodCache,
) *NodeEventHandler {
return &NodeEventHandler{
manager: manager,
k8sClient: k8sClient,
queries: queries,
interval: interval,
cache: cache,
}
}
func (h *NodeEventHandler) OnAdd(obj interface{}, isInInitialList bool) {
node, ok := obj.(*v1.Node)
if !ok {
slog.Error("adding node", "error", fmt.Errorf("expected *v1.Node, got %T", obj))
return
}
if node.Name == "" {
slog.Error("node name is empty")
return
}
nodeScraper := NewNodeScrapper(node.Name, h.k8sClient, h.queries, h.cache)
targetID := "node/" + node.Name
h.manager.AddTarget(targetID, nodeScraper.Scrape, h.interval)
}
func (h *NodeEventHandler) OnUpdate(oldObj, obj interface{}) {}
func (h *NodeEventHandler) OnDelete(obj interface{}) {
node, ok := obj.(*v1.Node)
if !ok {
slog.Error("removing node", "error", fmt.Errorf("expected *v1.Node, got %T", obj))
return
}
if node.Name == "" {
slog.Error("node name is empty")
return
}
targetID := "node/" + node.Name
h.manager.RemoveTarget(targetID)
}