-
Notifications
You must be signed in to change notification settings - Fork 332
/
ovn.go
497 lines (464 loc) · 15.7 KB
/
ovn.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
package metrics
import (
"fmt"
"strings"
"time"
"k8s.io/klog/v2"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util"
"github.com/prometheus/client_golang/prometheus"
)
// ovnController Configuration metrics
var metricRemoteProbeInterval = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: MetricOvnNamespace,
Subsystem: MetricOvnSubsystemController,
Name: "remote_probe_interval_seconds",
Help: "The inactivity probe interval of the connection to the OVN SB DB.",
})
var metricOpenFlowProbeInterval = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: MetricOvnNamespace,
Subsystem: MetricOvnSubsystemController,
Name: "openflow_probe_interval_seconds",
Help: "The inactivity probe interval of the OpenFlow connection to the " +
"OpenvSwitch integration bridge.",
})
var metricMonitorAll = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: MetricOvnNamespace,
Subsystem: MetricOvnSubsystemController,
Name: "monitor_all",
Help: "Specifies if ovn-controller should monitor all records of tables in OVN SB DB. " +
"If set to false, it will conditionally monitor the records that " +
"is needed in the current chassis. Values are false(0), true(1).",
})
var metricEncapIP = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricOvnNamespace,
Subsystem: MetricOvnSubsystemController,
Name: "encap_ip",
Help: "A metric with a constant '1' value labeled by ipadress that " +
"specifies the encapsulation ip address configured on that node.",
},
[]string{
"ipaddress",
},
)
var metricSbConnectionMethod = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricOvnNamespace,
Subsystem: MetricOvnSubsystemController,
Name: "sb_connection_method",
Help: "A metric with a constant '1' value labeled by connection_method that " +
"specifies the ovn-remote value configured on that node.",
},
[]string{
"connection_method",
},
)
var metricEncapType = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricOvnNamespace,
Subsystem: MetricOvnSubsystemController,
Name: "encap_type",
Help: "A metric with a constant '1' value labeled by type that " +
"specifies the encapsulation type a chassis should use to " +
"connect to this node.",
},
[]string{
"type",
},
)
var metricBridgeMappings = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricOvnNamespace,
Subsystem: MetricOvnSubsystemController,
Name: "bridge_mappings",
Help: "A metric with a constant '1' value labeled by mapping that " +
"specifies list of key-value pairs that map a physical network name " +
"to a local ovs bridge that provides connectivity to that network.",
},
[]string{
"mapping",
},
)
var metricOVNControllerSBDBConnection = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: MetricOvnNamespace,
Subsystem: MetricOvnSubsystemController,
Name: "southbound_database_connected",
Help: "Specifies if OVN controller is connected to OVN southbound database (1) or not (0)",
})
var (
ovnControllerVersion string
ovnControllerOvsLibVersion string
)
func getOvnControllerVersionInfo() {
stdout, _, err := util.RunOVNControllerAppCtl("version")
if err != nil {
return
}
// the output looks something like this:
// ovn-controller 20.06.0.86f64fc1
// Open vSwitch Library 2.13.0.f945b5c5
for _, line := range strings.Split(stdout, "\n") {
if strings.HasPrefix(line, "ovn-controller ") {
ovnControllerVersion = strings.Fields(line)[1]
} else if strings.HasPrefix(line, "Open vSwitch Library ") {
ovnControllerOvsLibVersion = strings.Fields(line)[3]
}
}
}
var ovnControllerCoverageShowMetricsMap = map[string]*metricDetails{
"lflow_run": {
help: "Number of times ovn-controller has translated " +
"the Logical_Flow table in the OVN " +
"SB database into OpenFlow flows.",
},
"rconn_sent": {
help: "Specifies the number of messages " +
"that have been sent to the underlying virtual " +
"connection (unix, tcp, or ssl) to OpenFlow devices.",
},
"rconn_queued": {
help: "Specifies the number of messages that have been " +
"queued because it couldn’t be sent using the " +
"underlying virtual connection to OpenFlow devices.",
},
"rconn_discarded": {
help: "Specifies the number of messages that " +
"have been dropped because the send queue " +
"had to be flushed because of reconnection.",
},
"rconn_overflow": {
help: "Specifies the number of messages that have " +
"been dropped because of the queue overflow.",
},
"vconn_open": {
help: "Specifies the number of attempts to connect " +
"to an OpenFlow Device.",
},
"vconn_sent": {
help: "Specifies the number of messages sent " +
"to the OpenFlow Device.",
},
"vconn_received": {
help: "Specifies the number of messages received " +
"from the OpenFlow Device.",
},
"stream_open": {
help: "Specifies the number of attempts to connect " +
"to a remote peer (active connection).",
},
"txn_success": {
help: "Specifies the number of times the OVSDB " +
"transaction has successfully completed.",
},
"txn_error": {
help: "Specifies the number of times the OVSDB " +
"transaction has errored out.",
},
"txn_uncommitted": {
help: "Specifies the number of times the OVSDB " +
"transaction were uncommitted.",
},
"txn_unchanged": {
help: "Specifies the number of times the OVSDB transaction " +
"resulted in no change to the database.",
},
"txn_incomplete": {
help: "Specifies the number of times the OVSDB transaction " +
"did not complete and the client had to re-try.",
},
"txn_aborted": {
help: "Specifies the number of times the OVSDB " +
" transaction has been aborted.",
},
"txn_try_again": {
help: "Specifies the number of times the OVSDB " +
"transaction failed and the client had to re-try.",
},
"netlink_sent": {
help: "Number of netlink message sent to the kernel.",
},
"netlink_received": {
help: "Number of netlink messages received by the kernel.",
},
"netlink_recv_jumbo": {
help: "Number of netlink messages that were received from " +
"the kernel were more than the allocated buffer.",
},
"netlink_overflow": {
help: "Netlink messages dropped by the daemon due " +
"to buffer overflow.",
},
"packet_in": {
srcName: "flow_extract",
help: "Specifies the number of times ovn-controller has " +
"handled the packet-ins from ovs-vswitchd.",
},
"packet_in_drop": {
aggregateFrom: []string{
"pinctrl_drop_put_mac_binding",
"pinctrl_drop_buffered_packets_map",
"pinctrl_drop_controller_event",
"pinctrl_drop_put_vport_binding",
},
help: "Specifies the number of times the ovn-controller has dropped the " +
"packet-ins from ovs-vswitchd due to resource constraints",
},
}
var ovnControllerStopwatchShowMetricsMap = map[string]*stopwatchMetricDetails{
"bfd_run": {
srcName: "bfd-run",
},
"flow_installation": {
srcName: "flow-installation",
},
"if_status_mgr_run": {
srcName: "if-status-mgr-run",
},
"if_status_mgr_update": {
srcName: "if-status-mgr-update",
},
"flow_generation": {
srcName: "flow-generation",
},
"pinctrl_run": {
srcName: "pinctrl-run",
},
"ofctrl_seqno_run": {
srcName: "ofctrl-seqno-run",
},
"patch_run": {
srcName: "patch-run",
},
"ct_zone_commit": {
srcName: "ct-zone-commit",
},
}
// setOvnControllerConfigurationMetrics updates ovn-controller configuration
// values (ovn-openflow-probe-interval, ovn-remote-probe-interval, ovn-monitor-all,
// ovn-encap-ip, ovn-encap-type, ovn-remote) through
// "ovs-vsctl list --columns=external_ids Open_vSwitch ."
func setOvnControllerConfigurationMetrics() (err error) {
var stdout, stderr string
defer func() {
if r := recover(); r != nil {
err = fmt.Errorf("recovering from panic while parsing the "+
"Open_vSwitch table's external_ids column - %v", r)
}
}()
stdout, stderr, err = util.RunOVSVsctl("--no-headings", "--data=bare",
"--columns=external_ids", "list", "Open_vSwitch", ".")
if err != nil {
return fmt.Errorf("failed to get Open_vSwitch table's external_ids column "+
"stderr(%s) : %v", stderr, err)
}
var ovnNodePortValue = 1
for _, kvPair := range strings.Fields(stdout) {
elem := strings.Split(kvPair, "=")
if len(elem) != 2 {
continue
}
fieldType := elem[0]
fieldValue := elem[1]
switch fieldType {
case "ovn-openflow-probe-interval":
metricValue := parseMetricToFloat(MetricOvnSubsystemController, "ovn-openflow-probe-interval", fieldValue)
metricOpenFlowProbeInterval.Set(metricValue)
case "ovn-remote-probe-interval":
metricValue := parseMetricToFloat(MetricOvnSubsystemController, "ovn-remote-probe-interval", fieldValue)
metricRemoteProbeInterval.Set(metricValue / 1000)
case "ovn-monitor-all":
var ovnMonitorValue float64
if fieldValue == "true" {
ovnMonitorValue = 1
}
metricMonitorAll.Set(ovnMonitorValue)
case "ovn-encap-ip":
// To update not only values but also labels for metrics, we use Reset() to delete previous labels+value
metricEncapIP.Reset()
metricEncapIP.WithLabelValues(fieldValue).Set(1)
case "ovn-remote":
metricSbConnectionMethod.Reset()
metricSbConnectionMethod.WithLabelValues(fieldValue).Set(1)
case "ovn-encap-type":
metricEncapType.Reset()
metricEncapType.WithLabelValues(fieldValue).Set(1)
case "ovn-k8s-node-port":
if fieldValue == "false" {
ovnNodePortValue = 0
}
case "ovn-bridge-mappings":
metricBridgeMappings.Reset()
metricBridgeMappings.WithLabelValues(fieldValue).Set(1)
}
}
metricOvnNodePortEnabled.Set(float64(ovnNodePortValue))
return nil
}
func ovnControllerConfigurationMetricsUpdater(stopChan <-chan struct{}) {
ticker := time.NewTicker(30 * time.Second)
defer ticker.Stop()
for {
select {
case <-ticker.C:
if err := setOvnControllerConfigurationMetrics(); err != nil {
klog.Errorf("Setting ovn controller config metrics failed: %s", err.Error())
}
case <-stopChan:
return
}
}
}
func getPortCount(portType string) float64 {
var portCount float64
stdout, stderr, err := util.RunOVSVsctl("--no-headings", "--data=bare", "--format=csv",
"--columns=name", "find", "interface", "type="+portType)
if err != nil {
klog.Errorf("Failed to get %s interface count, stderr(%s): (%v)", portType, stderr, err)
return 0
}
portNames := strings.Split(stdout, "\n")
switch portType {
case "patch":
for _, portName := range portNames {
if strings.Contains(portName, "br-int") {
portCount++
}
}
default:
portCount = float64(len(portNames))
}
return portCount
}
// ovnControllerSBDBConnectionCheckUpdater blocks until stopCh closed but before then polls ovn-controllers connection status with
// southbound database periodically.
func ovnControllerSBDBConnectionCheckUpdater(stopCh <-chan struct{}, ovsAppctl ovsClient, period time.Duration) {
// There maybe transient connection issues to SB DB. We want to minimise the risk of reporting this as the current state between
// long poll intervals.
retry := 5
retrySleep := 5 * time.Second
retryTotal := retrySleep * time.Duration(retry)
if retryTotal >= period {
panic("period must be greater than retry total time")
}
// update metric to a good initial state
updateSBDBConnectionMetric(ovsAppctl, retry, retrySleep)
ticker := time.NewTicker(period)
for {
select {
case <-ticker.C:
updateSBDBConnectionMetric(ovsAppctl, retry, retrySleep)
case <-stopCh:
ticker.Stop()
return
}
}
}
func updateSBDBConnectionMetric(ovsAppctl ovsClient, retry int, retrySleep time.Duration) {
var stdOut, stdErr string
var err error
var connected bool
connected = false
for i := 0; i < retry && !connected; i++ {
stdOut, stdErr, err = ovsAppctl("connection-status")
if err != nil {
klog.Errorf("Failed to get OVN controller southbound database connection status before utilizing "+
"client ovs-appctl: %v", err)
} else if stdErr != "" {
klog.Errorf("Failed to get OVN controller southbound database connection status because "+
"ovs-appctl command returned an error: %s", stdErr)
} else if stdOut == "" {
klog.Errorf("Unexpected blank output while attempting to retrieve OVN controller southbound " +
"database connection status")
} else if strings.HasPrefix(stdOut, "connected") {
connected = true
} else {
// sleep and retry
time.Sleep(retrySleep)
}
}
if connected {
metricOVNControllerSBDBConnection.Set(1)
} else {
metricOVNControllerSBDBConnection.Set(0)
}
}
func RegisterOvnControllerMetrics(stopChan <-chan struct{}) {
getOvnControllerVersionInfo()
ovnRegistry.MustRegister(prometheus.NewGaugeFunc(
prometheus.GaugeOpts{
Namespace: MetricOvnNamespace,
Subsystem: MetricOvnSubsystemController,
Name: "build_info",
Help: "A metric with a constant '1' value labeled by version and library " +
"from which ovn binaries were built",
ConstLabels: prometheus.Labels{
"version": ovnControllerVersion,
"ovs_lib_version": ovnControllerOvsLibVersion,
},
},
func() float64 { return 1 },
))
// ovn-controller metrics
ovnRegistry.MustRegister(metricOVNControllerSBDBConnection)
ovnRegistry.MustRegister(prometheus.NewCounterFunc(
prometheus.CounterOpts{
Namespace: MetricOvnNamespace,
Subsystem: MetricOvnSubsystemController,
Name: "integration_bridge_openflow_total",
Help: "The total number of OpenFlow flows in the integration bridge.",
}, func() float64 {
stdout, stderr, err := util.RunOVSOfctl("-t", "5", "dump-aggregate", "br-int")
if err != nil {
klog.Errorf("Failed to get flow count for br-int, stderr(%s): (%v)",
stderr, err)
return 0
}
for _, kvPair := range strings.Fields(stdout) {
if strings.HasPrefix(kvPair, "flow_count=") {
value := strings.Split(kvPair, "=")[1]
return parseMetricToFloat(MetricOvnSubsystemController, "integration_bridge_openflow_total",
value)
}
}
return 0
}))
ovnRegistry.MustRegister(prometheus.NewGaugeFunc(
prometheus.GaugeOpts{
Namespace: MetricOvnNamespace,
Subsystem: MetricOvnSubsystemController,
Name: "integration_bridge_patch_ports",
Help: "Captures the number of patch ports that connect br-int OVS " +
"bridge to physical OVS bridge and br-local OVS bridge.",
},
func() float64 {
return getPortCount("patch")
}))
ovnRegistry.MustRegister(prometheus.NewGaugeFunc(
prometheus.GaugeOpts{
Namespace: MetricOvnNamespace,
Subsystem: MetricOvnSubsystemController,
Name: "integration_bridge_geneve_ports",
Help: "Captures the number of geneve ports that are on br-int OVS bridge.",
},
func() float64 {
return getPortCount("geneve")
}))
// register ovn-controller configuration metrics
ovnRegistry.MustRegister(metricRemoteProbeInterval)
ovnRegistry.MustRegister(metricOpenFlowProbeInterval)
ovnRegistry.MustRegister(metricMonitorAll)
ovnRegistry.MustRegister(metricEncapIP)
ovnRegistry.MustRegister(metricSbConnectionMethod)
ovnRegistry.MustRegister(metricEncapType)
ovnRegistry.MustRegister(metricBridgeMappings)
// Register the ovn-controller coverage/show metrics
componentCoverageShowMetricsMap[ovnController] = ovnControllerCoverageShowMetricsMap
registerCoverageShowMetrics(ovnController, MetricOvnNamespace, MetricOvnSubsystemController)
// Register the ovn-controller coverage/show metrics
componentStopwatchShowMetricsMap[ovnController] = ovnControllerStopwatchShowMetricsMap
registerStopwatchShowMetrics(ovnController, MetricOvnNamespace, MetricOvnSubsystemController)
// ovn-controller configuration metrics updater
go ovnControllerConfigurationMetricsUpdater(stopChan)
// ovn-controller coverage show metrics updater
go coverageShowMetricsUpdater(ovnController, stopChan)
// ovn-controller stopwatch show metrics updater
go stopwatchShowMetricsUpdater(ovnController, stopChan)
// ovn-controller southbound database connection status updater
go ovnControllerSBDBConnectionCheckUpdater(stopChan, util.RunOVNControllerAppCtl, time.Minute*2)
}