Skip to content

Commit

Permalink
add metric for ovn nb/sb db status
Browse files Browse the repository at this point in the history
  • Loading branch information
hongzhen-ma committed Jan 27, 2022
1 parent 92e7b97 commit 4c4390b
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 1 deletion.
8 changes: 7 additions & 1 deletion dist/images/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2417,7 +2417,7 @@ OVN_SB_POD=
showHelp(){
echo "kubectl ko {subcommand} [option...]"
echo "Available Subcommands:"
echo " [nb|sb] [status|kick|backup] ovn-db operations show cluster status, kick stale server or backup database"
echo " [nb|sb] [status|kick|backup|dbstatus] ovn-db operations show cluster status, kick stale server, backup database or get db consistency status"
echo " nbctl [ovn-nbctl options ...] invoke ovn-nbctl"
echo " sbctl [ovn-sbctl options ...] invoke ovn-sbctl"
echo " vsctl {nodeName} [ovs-vsctl options ...] invoke ovs-vsctl on the specified node"
Expand Down Expand Up @@ -2837,6 +2837,9 @@ dbtool(){
kubectl exec "$OVN_NB_POD" -n $KUBE_OVN_NS -c ovn-central -- rm -f /etc/ovn/ovnnb_db.$suffix.backup
echo "backup $component to $(pwd)/ovnnb_db.$suffix.backup"
;;
dbstatus)
kubectl exec "$OVN_NB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovn-appctl -t /var/run/ovn/ovnnb_db.ctl ovsdb-server/get-db-storage-status OVN_Northbound
;;
*)
echo "unknown action $action"
esac
Expand All @@ -2856,6 +2859,9 @@ dbtool(){
kubectl exec "$OVN_SB_POD" -n $KUBE_OVN_NS -c ovn-central -- rm -f /etc/ovn/ovnsb_db.$suffix.backup
echo "backup $component to $(pwd)/ovnsb_db.$suffix.backup"
;;
dbstatus)
kubectl exec "$OVN_NB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovn-appctl -t /var/run/ovn/ovnsb_db.ctl ovsdb-server/get-db-storage-status OVN_Southbound
;;
*)
echo "unknown action $action"
esac
Expand Down
13 changes: 13 additions & 0 deletions dist/images/ovn-is-leader.sh
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,19 @@ else
kubectl label pod "$POD_NAME" -n "$POD_NAMESPACE" ovn-sb-leader-
fi

nb_status=$(ovn-appctl -t /var/run/ovn/ovnnb_db.ctl ovsdb-server/get-db-storage-status OVN_Northbound)
echo "nb $nb_status"
if [[ $nb_status =~ "inconsistent" ]]
then
exit 1
fi
sb_status=$(ovn-appctl -t /var/run/ovn/ovnsb_db.ctl ovsdb-server/get-db-storage-status OVN_Southbound)
echo "sb $sb_status"
if [[ $sb_status =~ "inconsistent" ]]
then
exit 1
fi

set +e
ovn-appctl -t /var/run/ovn/ovnnb_db.ctl ovsdb-server/compact
ovn-appctl -t /var/run/ovn/ovnsb_db.ctl ovsdb-server/compact
Expand Down
17 changes: 17 additions & 0 deletions pkg/ovnmonitor/exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ func (e *Exporter) ovnMetricsUpdate() {
e.exportOvnLogFileSizeGauge()
e.exportOvnDBFileSizeGauge()
e.exportOvnRequestErrorGauge()
e.exportOvnDBStatusGauge()

e.exportOvnChassisGauge()
e.exportLogicalSwitchGauge()
Expand Down Expand Up @@ -253,3 +254,19 @@ func (e *Exporter) exportOvnClusterInfoGauge() {
e.setOvnClusterInfoMetric(clusterStatus, database)
}
}

func (e *Exporter) exportOvnDBStatusGauge() {
dbList := []string{"OVN_Northbound", "OVN_Southbound"}
for _, database := range dbList {
ok, err := getDBStatus(database)
if err != nil {
klog.Errorf("Failed to get DB status for %s: %v", database, err)
return
}
if ok {
metricDBStatus.WithLabelValues(e.Client.System.Hostname, database).Set(1)
} else {
metricDBStatus.WithLabelValues(e.Client.System.Hostname, database).Set(0)
}
}
}
12 changes: 12 additions & 0 deletions pkg/ovnmonitor/metric.go
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,17 @@ var (
"server_id",
"cluster_id",
})

metricDBStatus = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: metricNamespace,
Name: "db_status",
Help: "The status of OVN NB/SB DB, (1) for healthy, (0) for unhealthy.",
},
[]string{
"hostname",
"db_name",
})
)

func registerOvnMetrics() {
Expand All @@ -457,6 +468,7 @@ func registerOvnMetrics() {
prometheus.MustRegister(metricRequestErrorNums)
prometheus.MustRegister(metricLogFileSize)
prometheus.MustRegister(metricDBFileSize)
prometheus.MustRegister(metricDBStatus)

// ovn chassis metrics
prometheus.MustRegister(metricChassisInfo)
Expand Down
31 changes: 31 additions & 0 deletions pkg/ovnmonitor/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -249,3 +249,34 @@ func parseDbStatus(output string) int {
}
return result
}

func getDBStatus(dbName string) (bool, error) {
var cmdstr string
var result bool
switch dbName {
case "OVN_Northbound":
cmdstr = fmt.Sprintf("ovn-appctl -t /var/run/ovn/ovnnb_db.ctl ovsdb-server/get-db-storage-status %s", dbName)
case "OVN_Southbound":
cmdstr = fmt.Sprintf("ovn-appctl -t /var/run/ovn/ovnsb_db.ctl ovsdb-server/get-db-storage-status %s", dbName)
}

cmd := exec.Command("sh", "-c", cmdstr)
output, err := cmd.CombinedOutput()
if err != nil {
klog.Errorf("get ovn-northbound status failed, err %v", err)
return false, err
}
lines := strings.Split(string(output), "\n")
for _, line := range lines {
if strings.Contains(line, "status: ok") {
result = true
break
}
if strings.Contains(line, "ovsdb error") {
result = false
break
}
}

return result, nil
}

0 comments on commit 4c4390b

Please sign in to comment.