Skip to content

Commit

Permalink
add restore process for ovn nb db
Browse files Browse the repository at this point in the history
  • Loading branch information
hongzhen-ma committed Mar 9, 2022
1 parent e9a4bd5 commit 44dae1f
Show file tree
Hide file tree
Showing 9 changed files with 199 additions and 11 deletions.
57 changes: 54 additions & 3 deletions dist/images/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -993,6 +993,7 @@ rules:
- statefulsets
- daemonsets
- deployments
- deployments/scale
verbs:
- create
- delete
Expand Down Expand Up @@ -1466,6 +1467,7 @@ rules:
- statefulsets
- daemonsets
- deployments
- deployments/scale
verbs:
- create
- delete
Expand Down Expand Up @@ -2501,7 +2503,7 @@ REGISTRY="kubeovn"
showHelp(){
echo "kubectl ko {subcommand} [option...]"
echo "Available Subcommands:"
echo " [nb|sb] [status|kick|backup|dbstatus] ovn-db operations show cluster status, kick stale server, backup database or get db consistency status"
echo " [nb|sb] [status|kick|backup|dbstatus|restore] ovn-db operations show cluster status, kick stale server, backup database, get db consistency status or restore ovn nb db when met 'inconsistent data' error"
echo " nbctl [ovn-nbctl options ...] invoke ovn-nbctl"
echo " sbctl [ovn-sbctl options ...] invoke ovn-sbctl"
echo " vsctl {nodeName} [ovs-vsctl options ...] invoke ovs-vsctl on the specified node"
Expand Down Expand Up @@ -2930,11 +2932,57 @@ dbtool(){
kubectl exec "$OVN_NB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovsdb-tool cluster-to-standalone /etc/ovn/ovnnb_db.$suffix.backup /etc/ovn/ovnnb_db.db
kubectl cp $KUBE_OVN_NS/$OVN_NB_POD:/etc/ovn/ovnnb_db.$suffix.backup $(pwd)/ovnnb_db.$suffix.backup
kubectl exec "$OVN_NB_POD" -n $KUBE_OVN_NS -c ovn-central -- rm -f /etc/ovn/ovnnb_db.$suffix.backup
echo "backup $component to $(pwd)/ovnnb_db.$suffix.backup"
echo "backup ovn-$component db to $(pwd)/ovnnb_db.$suffix.backup"
;;
dbstatus)
kubectl exec "$OVN_NB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovn-appctl -t /var/run/ovn/ovnnb_db.ctl ovsdb-server/get-db-storage-status OVN_Northbound
;;
restore)
# set ovn-central replicas to 0
replicas=$(kubectl get deployment -n $KUBE_OVN_NS ovn-central -o jsonpath={.spec.replicas})
kubectl scale deployment -n $KUBE_OVN_NS ovn-central --replicas=0
echo "ovn-central original replicas is $replicas"
# backup ovn-nb db
declare nodeIpArray
declare podNameArray
nodeIps=`kubectl get node -lkube-ovn/role=master -o wide | grep -v "INTERNAL-IP" | awk '{print $6}'`
firstIP=${nodeIps[0]}
podNames=`kubectl get pod -n $KUBE_OVN_NS | grep ovs-ovn | awk '{print $1}'`
echo "first nodeIP is $firstIP"
i=0
for nodeIp in $nodeIps
do
for pod in $podNames
do
hostip=$(kubectl get pod -n $KUBE_OVN_NS $pod -o jsonpath={.status.hostIP})
if [ $nodeIp = $hostip ]; then
nodeIpArray[$i]=$nodeIp
podNameArray[$i]=$pod
i=`expr $i + 1`
echo "ovs-ovn pod on node $nodeIp is $pod"
break
fi
done
done
echo "backup nb db file"
docker run -it -v /etc/origin/ovn:/etc/ovn $REGISTRY/kube-ovn:$VERSION bash -c "ovsdb-tool cluster-to-standalone /etc/ovn/ovnnb_db_standalone.db /etc/ovn/ovnnb_db.db"
# mv all db files
for pod in ${podNameArray[@]}
do
kubectl exec -it -n $KUBE_OVN_NS $pod -- mv /etc/ovn/ovnnb_db.db /tmp
kubectl exec -it -n $KUBE_OVN_NS $pod -- mv /etc/ovn/ovnsb_db.db /tmp
done
# restore db and replicas
echo "restore nb db file, operate in pod ${podNameArray[0]}"
kubectl exec -it -n $KUBE_OVN_NS ${podNameArray[0]} -- mv /etc/ovn/ovnnb_db_standalone.db /etc/ovn/ovnnb_db.db
kubectl scale deployment -n $KUBE_OVN_NS ovn-central --replicas=$replicas
echo "finish restore nb db file and ovn-central replicas"
;;
*)
echo "unknown action $action"
esac
Expand All @@ -2952,11 +3000,14 @@ dbtool(){
kubectl exec "$OVN_SB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovsdb-tool cluster-to-standalone /etc/ovn/ovnsb_db.$suffix.backup /etc/ovn/ovnsb_db.db
kubectl cp $KUBE_OVN_NS/$OVN_SB_POD:/etc/ovn/ovnsb_db.$suffix.backup $(pwd)/ovnsb_db.$suffix.backup
kubectl exec "$OVN_SB_POD" -n $KUBE_OVN_NS -c ovn-central -- rm -f /etc/ovn/ovnsb_db.$suffix.backup
echo "backup $component to $(pwd)/ovnsb_db.$suffix.backup"
echo "backup ovn-$component db to $(pwd)/ovnsb_db.$suffix.backup"
;;
dbstatus)
kubectl exec "$OVN_NB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovn-appctl -t /var/run/ovn/ovnsb_db.ctl ovsdb-server/get-db-storage-status OVN_Southbound
;;
restore)
echo "restore cmd is only used for nb db"
;;
*)
echo "unknown action $action"
esac
Expand Down
71 changes: 63 additions & 8 deletions dist/images/kubectl-ko
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ REGISTRY="kubeovn"
showHelp(){
echo "kubectl ko {subcommand} [option...]"
echo "Available Subcommands:"
echo " [nb|sb] [status|kick|backup] ovn-db operations show cluster status, kick stale server or backup database"
echo " [nb|sb] [status|kick|backup|dbstatus|restore] ovn-db operations show cluster status, kick stale server, backup database, get db consistency status or restore ovn nb db when met 'inconsistent data' error"
echo " nbctl [ovn-nbctl options ...] invoke ovn-nbctl"
echo " sbctl [ovn-sbctl options ...] invoke ovn-sbctl"
echo " vsctl {nodeName} [ovs-vsctl options ...] invoke ovs-vsctl on the specified node"
Expand Down Expand Up @@ -436,11 +436,60 @@ dbtool(){
kubectl exec "$OVN_NB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovs-appctl -t /var/run/ovn/ovnnb_db.ctl cluster/kick OVN_Northbound "$1"
;;
backup)
kubectl exec "$OVN_NB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovsdb-tool cluster-to-standalone /tmp/ovnnb_db.$suffix.backup /etc/ovn/ovnnb_db.db
kubectl cp $KUBE_OVN_NS/$OVN_NB_POD:/tmp/ovnnb_db.$suffix.backup $(pwd)/ovnnb_db.$suffix.backup
kubectl exec "$OVN_NB_POD" -n $KUBE_OVN_NS -c ovn-central -- rm -f /tmp/ovnnb_db.$suffix.backup
kubectl exec "$OVN_NB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovsdb-tool cluster-to-standalone /etc/ovn/ovnnb_db.$suffix.backup /etc/ovn/ovnnb_db.db
kubectl cp $KUBE_OVN_NS/$OVN_NB_POD:/etc/ovn/ovnnb_db.$suffix.backup $(pwd)/ovnnb_db.$suffix.backup
kubectl exec "$OVN_NB_POD" -n $KUBE_OVN_NS -c ovn-central -- rm -f /etc/ovn/ovnnb_db.$suffix.backup
echo "backup ovn-$component db to $(pwd)/ovnnb_db.$suffix.backup"
;;
dbstatus)
kubectl exec "$OVN_NB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovn-appctl -t /var/run/ovn/ovnnb_db.ctl ovsdb-server/get-db-storage-status OVN_Northbound
;;
restore)
# set ovn-central replicas to 0
replicas=$(kubectl get deployment -n $KUBE_OVN_NS ovn-central -o jsonpath={.spec.replicas})
kubectl scale deployment -n $KUBE_OVN_NS ovn-central --replicas=0
echo "ovn-central original replicas is $replicas"

# backup ovn-nb db
declare nodeIpArray
declare podNameArray
nodeIps=`kubectl get node -lkube-ovn/role=master -o wide | grep -v "INTERNAL-IP" | awk '{print $6}'`
firstIP=${nodeIps[0]}
podNames=`kubectl get pod -n $KUBE_OVN_NS | grep ovs-ovn | awk '{print $1}'`
echo "first nodeIP is $firstIP"

i=0
for nodeIp in $nodeIps
do
for pod in $podNames
do
hostip=$(kubectl get pod -n $KUBE_OVN_NS $pod -o jsonpath={.status.hostIP})
if [ $nodeIp = $hostip ]; then
nodeIpArray[$i]=$nodeIp
podNameArray[$i]=$pod
i=`expr $i + 1`
echo "ovs-ovn pod on node $nodeIp is $pod"
break
fi
done
done

echo "backup nb db file"
docker run -it -v /etc/origin/ovn:/etc/ovn $REGISTRY/kube-ovn:$VERSION bash -c "ovsdb-tool cluster-to-standalone /etc/ovn/ovnnb_db_standalone.db /etc/ovn/ovnnb_db.db"

# mv all db files
for pod in ${podNameArray[@]}
do
kubectl exec -it -n $KUBE_OVN_NS $pod -- mv /etc/ovn/ovnnb_db.db /tmp
kubectl exec -it -n $KUBE_OVN_NS $pod -- mv /etc/ovn/ovnsb_db.db /tmp
done

# restore db and replicas
echo "restore nb db file, operate in pod ${podNameArray[0]}"
kubectl exec -it -n $KUBE_OVN_NS ${podNameArray[0]} -- mv /etc/ovn/ovnnb_db_standalone.db /etc/ovn/ovnnb_db.db
kubectl scale deployment -n $KUBE_OVN_NS ovn-central --replicas=$replicas
echo "finish restore nb db file and ovn-central replicas"
;;
*)
echo "unknown action $action"
esac
Expand All @@ -455,11 +504,17 @@ dbtool(){
kubectl exec "$OVN_SB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovs-appctl -t /var/run/ovn/ovnsb_db.ctl cluster/kick OVN_Southbound "$1"
;;
backup)
kubectl exec "$OVN_SB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovsdb-tool cluster-to-standalone /tmp/ovnsb_db.$suffix.backup /etc/ovn/ovnsb_db.db
kubectl cp $KUBE_OVN_NS/$OVN_SB_POD:/tmp/ovnsb_db.$suffix.backup $(pwd)/ovnsb_db.$suffix.backup
kubectl exec "$OVN_SB_POD" -n $KUBE_OVN_NS -c ovn-central -- rm -f /tmp/ovnsb_db.$suffix.backup
kubectl exec "$OVN_SB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovsdb-tool cluster-to-standalone /etc/ovn/ovnsb_db.$suffix.backup /etc/ovn/ovnsb_db.db
kubectl cp $KUBE_OVN_NS/$OVN_SB_POD:/etc/ovn/ovnsb_db.$suffix.backup $(pwd)/ovnsb_db.$suffix.backup
kubectl exec "$OVN_SB_POD" -n $KUBE_OVN_NS -c ovn-central -- rm -f /etc/ovn/ovnsb_db.$suffix.backup
echo "backup ovn-$component db to $(pwd)/ovnsb_db.$suffix.backup"
;;
dbstatus)
kubectl exec "$OVN_NB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovn-appctl -t /var/run/ovn/ovnsb_db.ctl ovsdb-server/get-db-storage-status OVN_Southbound
;;
restore)
echo "restore cmd is only used for nb db"
;;
*)
echo "unknown action $action"
esac
Expand Down Expand Up @@ -656,6 +711,6 @@ case $subcommand in
tuning "$@"
;;
*)
showHelp
showHelp
;;
esac
47 changes: 47 additions & 0 deletions dist/images/restore-ovn-nb-db.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/bin/bash

KUBE_OVN_NS=kube-system
# set ovn-central replicas to 0
replicas=$(kubectl get deployment -n $KUBE_OVN_NS ovn-central -o jsonpath={.spec.replicas})
kubectl scale deployment -n $KUBE_OVN_NS ovn-central --replicas=0
echo "ovn-central original replicas is $replicas"

# backup ovn-nb db
declare nodeIpArray
declare podNameArray
nodeIps=`kubectl get node -lkube-ovn/role=master -o wide | grep -v "INTERNAL-IP" | awk '{print $6}'`
firstIP=${nodeIps[0]}
podNames=`kubectl get pod -n $KUBE_OVN_NS | grep ovs-ovn | awk '{print $1}'`
echo "first nodeIP is $firstIP"

i=0
for nodeIp in $nodeIps
do
for pod in $podNames
do
hostip=$(kubectl get pod -n $KUBE_OVN_NS $pod -o jsonpath={.status.hostIP})
if [ $nodeIp = $hostip ]; then
nodeIpArray[$i]=$nodeIp
podNameArray[$i]=$pod
i=`expr $i + 1`
echo "ovs-ovn pod on node $nodeIp is $pod"
break
fi
done
done

echo "backup nb db file"
ovsdb-tool cluster-to-standalone /etc/ovn/ovnnb_db_standalone.db /etc/ovn/ovnnb_db.db

# mv all db files
for pod in ${podNameArray[@]}
do
kubectl exec -it -n $KUBE_OVN_NS $pod -- mv /etc/ovn/ovnnb_db.db /tmp
kubectl exec -it -n $KUBE_OVN_NS $pod -- mv /etc/ovn/ovnsb_db.db /tmp
done

# restore db and replicas
echo "restore nb db file"
mv /etc/ovn/ovnnb_db_standalone.db /etc/ovn/ovnnb_db.db
kubectl scale deployment -n $KUBE_OVN_NS ovn-central --replicas=$replicas
echo "finish restore nb db file and ovn-central replicas"
1 change: 1 addition & 0 deletions dist/images/update/1.7-1.8.2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -714,6 +714,7 @@ rules:
- statefulsets
- daemonsets
- deployments
- deployments/scale
verbs:
- create
- delete
Expand Down
1 change: 1 addition & 0 deletions dist/images/update/1.8.2-1.9.0.sh
Original file line number Diff line number Diff line change
Expand Up @@ -768,6 +768,7 @@ rules:
- statefulsets
- daemonsets
- deployments
- deployments/scale
verbs:
- create
- delete
Expand Down
30 changes: 30 additions & 0 deletions pkg/ovnmonitor/exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package ovnmonitor

import (
"os"
"os/exec"
"sync"
"time"

Expand All @@ -15,6 +16,8 @@ var (
appName = "ovn-monitor"
isClusterEnabled = true
tryConnectCnt = 0
checkNbDbCnt = 0
checkSbDbCnt = 0
)

// Exporter collects OVN data from the given server and exports them using
Expand Down Expand Up @@ -267,6 +270,33 @@ func (e *Exporter) exportOvnDBStatusGauge() {
metricDBStatus.WithLabelValues(e.Client.System.Hostname, database).Set(1)
} else {
metricDBStatus.WithLabelValues(e.Client.System.Hostname, database).Set(0)

switch database {
case "OVN_Northbound":
checkNbDbCnt++
if checkNbDbCnt < 6 {
klog.Warningf("Failed to get OVN NB DB status for %v times", checkNbDbCnt)
return
} else {
klog.Warningf("Failed to get OVN NB DB status for %v times, ready to restore OVN DB", checkNbDbCnt)
checkNbDbCnt = 0
}
case "OVN_Southbound":
checkSbDbCnt++
if checkSbDbCnt < 6 {
klog.Warningf("Failed to get OVN SB DB status for %v times", checkSbDbCnt)
return
} else {
klog.Warningf("Failed to get OVN SB DB status for %v times, ready to restore OVN DB", checkSbDbCnt)
checkSbDbCnt = 0
}
}

output, err := exec.Command("/bin/bash", "/kube-ovn/restore-ovn-nb-db.sh").CombinedOutput()
if err != nil {
klog.Errorf("Failed to restore OVN DB, err %v", err)
}
klog.Infof("restore OVN DB %v, process output %v", database, string(output))
}
}
}
1 change: 1 addition & 0 deletions yamls/ovn-dpdk.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ rules:
- statefulsets
- daemonsets
- deployments
- deployments/scale
verbs:
- get
- list
Expand Down
1 change: 1 addition & 0 deletions yamls/ovn-ha.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ rules:
- statefulsets
- daemonsets
- deployments
- deployments/scale
verbs:
- get
- list
Expand Down
1 change: 1 addition & 0 deletions yamls/ovn.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ rules:
- statefulsets
- daemonsets
- deployments
- deployments/scale
verbs:
- create
- delete
Expand Down

0 comments on commit 44dae1f

Please sign in to comment.