Skip to content

Commit

Permalink
fix ovs-ovn startup/restart (#2467)
Browse files Browse the repository at this point in the history
  • Loading branch information
zhangzujian committed Mar 14, 2023
1 parent b26784f commit d1cd3dd
Show file tree
Hide file tree
Showing 7 changed files with 43 additions and 108 deletions.
2 changes: 1 addition & 1 deletion Makefile
Expand Up @@ -623,7 +623,7 @@ kind-reload: kind-reload-ovs

.PHONY: kind-reload-ovs
kind-reload-ovs: kind-load-image
kubectl delete pod -n kube-system -l app=ovs
kubectl -n kube-system rollout restart ds ovs-ovn

.PHONY: kind-clean
kind-clean:
Expand Down
2 changes: 1 addition & 1 deletion dist/images/db_autocheck_script.sh
Expand Up @@ -59,7 +59,7 @@ restoreNB(){
echo "restore db file, operate in pod ${podNameArray[0]}"
kubectl exec -it -n $KUBE_OVN_NS ${podNameArray[0]} -- mv /etc/ovn/ovnnb_db_standalone.db /etc/ovn/ovnnb_db.db
kubectl scale deployment -n $KUBE_OVN_NS ovn-central --replicas=$replicas
kubectl -n kube-system delete pod -l app=ovs
kubectl -n kube-system rollout restart ds ovs-ovn
echo "finish restore db file and ovn-central replicas"
exit 0
}
Expand Down
1 change: 1 addition & 0 deletions dist/images/install.sh
Expand Up @@ -2979,6 +2979,7 @@ EOF
kubectl apply -f ovn-dpdk.yaml
fi
kubectl rollout status deployment/ovn-central -n kube-system --timeout 300s
kubectl rollout status daemonset/ovs-ovn -n kube-system --timeout 120s
echo "-------------------------------"
echo ""

Expand Down
6 changes: 3 additions & 3 deletions dist/images/kubectl-ko
Expand Up @@ -740,8 +740,8 @@ dbtool(){
done
echo "finish restore nb db file and ovn-central replicas"

echo "recreate ovs-ovn pods"
kubectl delete pod -n $KUBE_OVN_NS -l app=ovs
echo "restart ovs-ovn"
kubectl -n $KUBE_OVN_NS rollout restart ds ovs-ovn
;;
*)
echo "unknown action $action"
Expand Down Expand Up @@ -915,7 +915,7 @@ tuning(){
reload(){
kubectl delete pod -n kube-system -l app=ovn-central
kubectl rollout status deployment/ovn-central -n kube-system
kubectl delete pod -n kube-system -l app=ovs
kubectl rollout restart daemonset/ovs-ovn -n kube-system
kubectl delete pod -n kube-system -l app=kube-ovn-controller
kubectl rollout status deployment/kube-ovn-controller -n kube-system
kubectl delete pod -n kube-system -l app=kube-ovn-cni
Expand Down
4 changes: 2 additions & 2 deletions dist/images/restore-ovn-nb-db.sh
Expand Up @@ -53,5 +53,5 @@ mv /etc/ovn/ovnnb_db_standalone.db /etc/ovn/ovnnb_db.db
kubectl scale deployment -n $KUBE_OVN_NS ovn-central --replicas=$replicas
echo "finish restore nb db file and ovn-central replicas"

echo "recreate ovs-ovn pods"
kubectl delete pod -n $KUBE_OVN_NS -l app=ovs
echo "restart ovs-ovn"
kubectl -n $KUBE_OVN_NS rollout restart ds ovs-ovn
90 changes: 12 additions & 78 deletions dist/images/start-ovs.sh
Expand Up @@ -34,25 +34,25 @@ cat /proc/cmdline"
exit 1
fi

function quit {
set +e
for netns in /var/run/netns/*; do
nsenter --net=$netns sysctl -w net.ipv4.neigh.eth0.base_reachable_time_ms=180000;
nsenter --net=$netns sysctl -w net.ipv4.neigh.eth0.gc_stale_time=180;
done
# If the arp is in stale or delay status, stop vswitchd will lead prob failed.
# Wait a while for prob ready.
# As the timeout has been increased existing entry will not change to stale or delay at the moment
sleep 5
function cgroup_match {
hash1=$(md5sum /proc/$1/cgroup | awk '{print $1}')
hash2=$(md5sum /proc/$2/cgroup | awk '{print $1}')
test -n "$hash1" -a "x$hash1" = "x$hash2"
}

function quit {
gen_name=$(kubectl -n $POD_NAMESPACE get pod $POD_NAME -o jsonpath='{.metadata.generateName}')
revision_hash=$(kubectl -n $POD_NAMESPACE get pod $POD_NAME -o jsonpath='{.metadata.labels.controller-revision-hash}')
revision=$(kubectl -n $POD_NAMESPACE get controllerrevision $gen_name$revision_hash -o jsonpath='{.revision}')
ds_name=${gen_name%-}
latest_revision=$(kubectl -n kube-system get controllerrevision --no-headers | awk '$2 == "daemonset.apps/'$ds_name'" {print $3}' | sort -nr | head -n1)
if [ "x$latest_revision" = "x$revision" ]; then
/usr/share/ovn/scripts/grace_stop_ovn_controller
/usr/share/openvswitch/scripts/ovs-ctl stop
# stop ovn-controller/ovs only when the processes are in the same cgroup
pid=$(/usr/share/ovn/scripts/ovn-ctl status_controller | awk '{print $NF}')
if cgroup_match $pid self; then
/usr/share/ovn/scripts/grace_stop_ovn_controller
/usr/share/openvswitch/scripts/ovs-ctl stop
fi
fi

exit 0
Expand Down Expand Up @@ -152,49 +152,6 @@ function exchange_link_names() {

exchange_link_names

function wait_flows_pre_check() {
local devices=""
local ips=($(echo $OVN_DB_IPS | sed 's/,/ /g'))
for ip in ${ips[*]}; do
devices="$devices $(ip route get $ip | grep -oE 'dev .+' | awk '{print $2}')"
done

bridges=($(ovs-vsctl --no-heading --columns=name find bridge external-ids:vendor=kube-ovn))
for br in ${bridges[@]}; do
ports=($(ovs-vsctl list-ports $br))
for port in ${ports[@]}; do
if ! echo $devices | grep -qw "$port"; then
continue
fi

port_type=$(ovs-vsctl --no-heading --columns=type find interface name=$port)
if [ ! "x$port_type" = 'x""' ]; then
continue
fi

if ! ip link show $port | grep -qw "master ovs-system"; then
return 1
fi
done
done

return 0
}

skip_wait_flows=0
if ! wait_flows_pre_check; then
skip_wait_flows=1
fi

if [ $skip_wait_flows -eq 0 ]; then
# When ovs-vswitchd starts with this value set as true, it will neither flush or
# expire previously set datapath flows nor will it send and receive any
# packets to or from the datapath. Please check ovs-vswitchd.conf.db.5.txt
ovs-vsctl --no-wait set open_vswitch . other_config:flow-restore-wait="true"
else
ovs-vsctl --no-wait set open_vswitch . other_config:flow-restore-wait="false"
fi

# Start vswitchd. restart will automatically set/unset flow-restore-wait which is not what we want
/usr/share/openvswitch/scripts/ovs-ctl restart --no-ovsdb-server --system-id=random --no-mlockall
/usr/share/openvswitch/scripts/ovs-ctl --protocol=udp --dport=6081 enable-protocol
Expand Down Expand Up @@ -300,28 +257,5 @@ else
/usr/share/ovn/scripts/ovn-ctl --ovn-controller-ssl-key=/var/run/tls/key --ovn-controller-ssl-cert=/var/run/tls/cert --ovn-controller-ssl-ca-cert=/var/run/tls/cacert restart_controller
fi

if [ $skip_wait_flows -eq 0 ]; then
# Wait ovn-controller finish init flow compute and update it to vswitchd,
# then update flow-restore-wait to indicate vswitchd to process flows
set +e
flow_num=$(ovs-ofctl dump-flows br-int | wc -l)
while [ $flow_num -le $FLOW_LIMIT ]
do
echo "$flow_num flows now, waiting for ovs-vswitchd flow ready"
sleep 1
flow_num=$(ovs-ofctl dump-flows br-int | wc -l)
done
set -e

ovs-vsctl --no-wait set open_vswitch . other_config:flow-restore-wait="false"
fi

set +e
for netns in /var/run/netns/*; do
nsenter --net=$netns sysctl -w net.ipv4.neigh.eth0.base_reachable_time_ms=30000;
nsenter --net=$netns sysctl -w net.ipv4.neigh.eth0.gc_stale_time=60;
done
set -e

chmod 600 /etc/openvswitch/*
tail --follow=name --retry /var/log/ovn/ovn-controller.log
46 changes: 23 additions & 23 deletions docs/performance-tuning.md
Expand Up @@ -18,7 +18,7 @@ We use `qperf -t 60 <server ip> -ub -oo msg_size:1 -vu tcp_lat tcp_bw udp_lat ud
for tcp/udp latency and throughput and compare with host network performance as baseline.

| Type | tcp_lat (us) | udp_lat (us) | tcp_bw (Mb/s) | udp_bw(Mb/s) |
| ------------------ | -------------| -------------| --------------| -------------|
| ------------------ | ------------ | ------------ | ------------- | ------------ |
| Kube-OVN Default | 25.7 | 22.9 | 27.1 | 1.59 |
| Kube-OVN Optimized | 13.9 | 12.9 | 27.6 | 5.57 |
| HOST Network | 13.1 | 12.4 | 28.2 | 6.02 |
Expand All @@ -35,33 +35,33 @@ In a different environment set, we compare the performance between optimized Kub

`qperf -t 60 <server ip> -ub -oo msg_size:1 -vu tcp_lat tcp_bw udp_lat udp_bw`

| Type | tcp_lat (us) | udp_lat (us) | tcp_bw (Mb/s) | udp_bw(Mb/s) |
| ------------------ | -------------| -------------| --------------| -------------|
| Kube-OVN Overlay | 15.2 | 14.6 | 23.6 | 2.65 |
| Kube-OVN Underlay | 14.3 | 13.8 | 24.2 | 3.46 |
| Calico IPIP | 21.4 | 20.2 | 23.6 | 1.18 |
| Calico NoEncap | 19.3 | 16.9 | 23.6 | 1.76 |
| HOST Network | 16.6 | 15.4 | 24.8 | 2.64 |
| Type | tcp_lat (us) | udp_lat (us) | tcp_bw (Mb/s) | udp_bw(Mb/s) |
| ----------------- | ------------ | ------------ | ------------- | ------------ |
| Kube-OVN Overlay | 15.2 | 14.6 | 23.6 | 2.65 |
| Kube-OVN Underlay | 14.3 | 13.8 | 24.2 | 3.46 |
| Calico IPIP | 21.4 | 20.2 | 23.6 | 1.18 |
| Calico NoEncap | 19.3 | 16.9 | 23.6 | 1.76 |
| HOST Network | 16.6 | 15.4 | 24.8 | 2.64 |

`qperf -t 60 <server ip> -ub -oo msg_size:1K -vu tcp_lat tcp_bw udp_lat udp_bw`

| Type | tcp_lat (us) | udp_lat (us) | tcp_bw (Gb/s) | udp_bw(Gb/s) |
| ------------------ | -------------| -------------| --------------| -------------|
| Kube-OVN Overlay | 16.5 | 15.8 | 10.2 | 2.77 |
| Kube-OVN Underlay | 15.9 | 14.5 | 9.6 | 3.22 |
| Calico IPIP | 22.5 | 21.5 | 1.45 | 1.14 |
| Calico NoEncap | 19.4 | 18.3 | 3.76 | 1.63 |
| HOST Network | 18.1 | 16.6 | 9.32 | 2.66 |
| Type | tcp_lat (us) | udp_lat (us) | tcp_bw (Gb/s) | udp_bw(Gb/s) |
| ----------------- | ------------ | ------------ | ------------- | ------------ |
| Kube-OVN Overlay | 16.5 | 15.8 | 10.2 | 2.77 |
| Kube-OVN Underlay | 15.9 | 14.5 | 9.6 | 3.22 |
| Calico IPIP | 22.5 | 21.5 | 1.45 | 1.14 |
| Calico NoEncap | 19.4 | 18.3 | 3.76 | 1.63 |
| HOST Network | 18.1 | 16.6 | 9.32 | 2.66 |

`qperf -t 60 <server ip> -ub -oo msg_size:4K -vu tcp_lat tcp_bw udp_lat udp_bw`

| Type | tcp_lat (us) | udp_lat (us) | tcp_bw (Gb/s) | udp_bw(Gb/s) |
| ------------------ | -------------| -------------| --------------| -------------|
| Kube-OVN Overlay | 34.7 | 41.6 | 16.0 | 9.23 |
| Kube-OVN Underlay | 32.6 | 44 | 15.1 | 6.71 |
| Calico IPIP | 44.8 | 52.9 | 2.94 | 3.26 |
| Calico NoEncap | 40 | 49.6 | 6.56 | 4.19 |
| HOST Network | 35.9 | 45.9 | 14.6 | 5.59 |
| Type | tcp_lat (us) | udp_lat (us) | tcp_bw (Gb/s) | udp_bw(Gb/s) |
| ----------------- | ------------ | ------------ | ------------- | ------------ |
| Kube-OVN Overlay | 34.7 | 41.6 | 16.0 | 9.23 |
| Kube-OVN Underlay | 32.6 | 44 | 15.1 | 6.71 |
| Calico IPIP | 44.8 | 52.9 | 2.94 | 3.26 |
| Calico NoEncap | 40 | 49.6 | 6.56 | 4.19 |
| HOST Network | 35.9 | 45.9 | 14.6 | 5.59 |

This benchmark is for reference only, the result may vary dramatically due to different hardware and software setups.
Optimization for packets with big size and underlay latency are still in progress, we will publish the optimization
Expand Down Expand Up @@ -263,5 +263,5 @@ Unfortunately, this tunnel type is not embedded in kernel, you have to compile O
```bash
kubectl set env daemonset/ovs-ovn -n kube-system TUNNEL_TYPE=stt

kubectl delete pod -n kube-system -lapp=ovs
kubectl -n kube-system rollout restart ds ovs-ovn
```

0 comments on commit d1cd3dd

Please sign in to comment.