From 497260efe4abfa2ceef95f9954b26dec57c9f7a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E7=A5=96=E5=BB=BA?= Date: Tue, 7 Mar 2023 17:00:04 +0800 Subject: [PATCH] ovs: change update strategy to RollingUpdate (#2422) --- .github/workflows/scheduled-e2e.yaml | 15 +++++++++--- Makefile | 8 +++++-- dist/images/install.sh | 30 ++++++++++++++++++++++-- dist/images/start-ovs.sh | 15 +++++++++--- kubeovn-helm/README.md | 6 +++++ kubeovn-helm/templates/ovn-CR.yaml | 7 ++++++ kubeovn-helm/templates/ovsovn-ds.yaml | 13 +++++++++- kubeovn-helm/templates/post-upgrade.yaml | 2 ++ kubeovn-helm/values.yaml | 1 + yamls/ovn-ha.yaml | 24 +++++++++++++++++-- yamls/ovn.yaml | 24 +++++++++++++++++-- 11 files changed, 130 insertions(+), 15 deletions(-) diff --git a/.github/workflows/scheduled-e2e.yaml b/.github/workflows/scheduled-e2e.yaml index dd3c6eec108..1a3227b5cfc 100644 --- a/.github/workflows/scheduled-e2e.yaml +++ b/.github/workflows/scheduled-e2e.yaml @@ -1072,8 +1072,8 @@ jobs: - name: Parse versions run: | - version_from=$(echo ${{ matrix.case }} | awk '{print $1}') - version_to=$(echo ${{ matrix.case }} | awk '{print $3}') + version_from=$(echo '${{ matrix.case }}' | awk '{print $1}') + version_to=$(echo '${{ matrix.case }}' | awk '{print $3}') echo "VERSION_FROM=$version_from" >> "$GITHUB_ENV" echo "VERSION_TO=$version_to" >> "$GITHUB_ENV" @@ -1121,7 +1121,16 @@ jobs: run: | version=$(grep -E '^VERSION="v([0-9]+\.){2}[0-9]+"$' dist/images/install.sh | head -n1 | awk -F= '{print $2}' | tr -d '"') docker pull kubeovn/kube-ovn:$version - VERSION=$version make kind-upgrade-chart + + restart_ovs=false + v1=$(printf "$VERSION_FROM\\nrelease-1.11" | sort -Vr | head -n1) + v2=$(printf "$VERSION_TO\\nrelease-1.12" | sort -Vr | head -n1) + if [ $v1 = "release-1.11" ]; then + if [ $VERSION_TO = "master" -o $VERSION_TO = $v2 ]; then + restart_ovs=true + fi + fi + CHART_UPGRADE_RESTART_OVS=$restart_ovs VERSION=$version make kind-upgrade-chart - name: Run E2E env: diff --git a/Makefile b/Makefile index f56b74770ad..df1f9bfd11f 100644 --- a/Makefile +++ b/Makefile @@ -12,6 +12,8 @@ GOLDFLAGS = "-w -s -extldflags '-z now' -X github.com/kubeovn/kube-ovn/versions. CONTROL_PLANE_TAINTS = node-role.kubernetes.io/master node-role.kubernetes.io/control-plane +CHART_UPGRADE_RESTART_OVS=$(shell echo $${CHART_UPGRADE_RESTART_OVS:-false}) + MULTUS_IMAGE = ghcr.io/k8snetworkplumbingwg/multus-cni:stable MULTUS_YAML = https://raw.githubusercontent.com/k8snetworkplumbingwg/multus-cni/master/deployments/multus-daemonset.yml @@ -332,9 +334,11 @@ kind-upgrade-chart: kind-load-image helm upgrade kubeovn ./kubeovn-helm \ --set global.images.kubeovn.tag=$(VERSION) \ --set replicaCount=$$(echo $(OVN_DB_IPS) | awk -F ',' '{print NF}') \ - --set MASTER_NODES='$(OVN_DB_IPS)' + --set MASTER_NODES='$(OVN_DB_IPS)' \ + --set restart_ovs=$(CHART_UPGRADE_RESTART_OVS) kubectl rollout status deployment/ovn-central -n kube-system --timeout 300s - kubectl rollout status deployment/kube-ovn-controller -n kube-system --timeout 300s + kubectl rollout status daemonset/ovs-ovn -n kube-system --timeout 120s + kubectl rollout status deployment/kube-ovn-controller -n kube-system --timeout 120s kubectl rollout status daemonset/kube-ovn-cni -n kube-system --timeout 120s kubectl rollout status daemonset/kube-ovn-pinger -n kube-system --timeout 120s diff --git a/dist/images/install.sh b/dist/images/install.sh index 4b1c4ab3687..759344e5910 100755 --- a/dist/images/install.sh +++ b/dist/images/install.sh @@ -211,7 +211,7 @@ echo "-------------------------------" echo "" echo "[Step 2/6] Install OVN components" -addresses=$(kubectl get no -lkube-ovn/role=master --no-headers -o wide | awk '{print $6}' | tr \\n ',') +addresses=$(kubectl get no -lkube-ovn/role=master --no-headers -o wide | awk '{print $6}' | tr \\n ',' | sed 's/,$//') count=$(kubectl get no -lkube-ovn/role=master --no-headers | wc -l) echo "Install OVN DB in $addresses" @@ -1893,6 +1893,13 @@ rules: - create - patch - update + - apiGroups: + - apps + resources: + - controllerrevisions + verbs: + - get + - list - apiGroups: - coordination.k8s.io resources: @@ -2385,6 +2392,13 @@ rules: - create - patch - update + - apiGroups: + - apps + resources: + - controllerrevisions + verbs: + - get + - list - apiGroups: - coordination.k8s.io resources: @@ -2639,7 +2653,10 @@ spec: matchLabels: app: ovs updateStrategy: - type: OnDelete + type: RollingUpdate + rollingUpdate: + maxSurge: 100% + maxUnavailable: 0 template: metadata: labels: @@ -2673,6 +2690,14 @@ spec: valueFrom: fieldRef: fieldPath: status.podIP + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace - name: HW_OFFLOAD value: "$HW_OFFLOAD" - name: TUNNEL_TYPE @@ -2719,6 +2744,7 @@ spec: - bash - -c - LOG_ROTATE=true /kube-ovn/ovs-healthcheck.sh + initialDelaySeconds: 10 periodSeconds: 5 timeoutSeconds: 45 livenessProbe: diff --git a/dist/images/start-ovs.sh b/dist/images/start-ovs.sh index 2a99feb42d1..9d6ecd4359e 100755 --- a/dist/images/start-ovs.sh +++ b/dist/images/start-ovs.sh @@ -44,8 +44,17 @@ function quit { # Wait a while for prob ready. # As the timeout has been increased existing entry will not change to stale or delay at the moment sleep 5 - /usr/share/ovn/scripts/grace_stop_ovn_controller - /usr/share/openvswitch/scripts/ovs-ctl stop + + gen_name=$(kubectl -n $POD_NAMESPACE get pod $POD_NAME -o jsonpath='{.metadata.generateName}') + revision_hash=$(kubectl -n $POD_NAMESPACE get pod $POD_NAME -o jsonpath='{.metadata.labels.controller-revision-hash}') + revision=$(kubectl -n $POD_NAMESPACE get controllerrevision $gen_name$revision_hash -o jsonpath='{.revision}') + ds_name=${gen_name%-} + latest_revision=$(kubectl -n kube-system get controllerrevision --no-headers | awk '$2 == "daemonset.apps/'$ds_name'" {print $3}' | sort -nr | head -n1) + if [ "x$latest_revision" = "x$revision" ]; then + /usr/share/ovn/scripts/grace_stop_ovn_controller + /usr/share/openvswitch/scripts/ovs-ctl stop + fi + exit 0 } trap quit EXIT @@ -187,7 +196,7 @@ else fi # Start vswitchd. restart will automatically set/unset flow-restore-wait which is not what we want -/usr/share/openvswitch/scripts/ovs-ctl start --no-ovsdb-server --system-id=random --no-mlockall +/usr/share/openvswitch/scripts/ovs-ctl restart --no-ovsdb-server --system-id=random --no-mlockall /usr/share/openvswitch/scripts/ovs-ctl --protocol=udp --dport=6081 enable-protocol sleep 1 diff --git a/kubeovn-helm/README.md b/kubeovn-helm/README.md index 72dbb577230..0c6e622d0e5 100644 --- a/kubeovn-helm/README.md +++ b/kubeovn-helm/README.md @@ -18,3 +18,9 @@ $ helm install --debug kubeovn ./kubeovn-helm --set MASTER_NODES=${Node0},${Node # upgrade to this version $ helm upgrade --debug kubeovn ./kubeovn-helm --set MASTER_NODES=${Node0},${Node1},${Node2}, --set replicaCount=3 ``` + +If you are upgrading Kube-OVN from versions prior to v1.12, you need to set `restart_ovs` to `true`: + +```shell +$ helm upgrade --debug kubeovn ./kubeovn-helm --set MASTER_NODES=${Node0},${Node1},${Node2}, --set replicaCount=3 --set restart_ovs=true +``` diff --git a/kubeovn-helm/templates/ovn-CR.yaml b/kubeovn-helm/templates/ovn-CR.yaml index c7fb1bc24ee..131478cc443 100644 --- a/kubeovn-helm/templates/ovn-CR.yaml +++ b/kubeovn-helm/templates/ovn-CR.yaml @@ -88,6 +88,13 @@ rules: - create - patch - update + - apiGroups: + - apps + resources: + - controllerrevisions + verbs: + - get + - list - apiGroups: - coordination.k8s.io resources: diff --git a/kubeovn-helm/templates/ovsovn-ds.yaml b/kubeovn-helm/templates/ovsovn-ds.yaml index 33f262b911e..26865dc3d3c 100644 --- a/kubeovn-helm/templates/ovsovn-ds.yaml +++ b/kubeovn-helm/templates/ovsovn-ds.yaml @@ -12,7 +12,10 @@ spec: matchLabels: app: ovs updateStrategy: - type: OnDelete + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 template: metadata: labels: @@ -48,6 +51,14 @@ spec: valueFrom: fieldRef: fieldPath: status.podIP + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace - name: HW_OFFLOAD value: "{{- .Values.func.HW_OFFLOAD }}" - name: TUNNEL_TYPE diff --git a/kubeovn-helm/templates/post-upgrade.yaml b/kubeovn-helm/templates/post-upgrade.yaml index 57e8374a0fe..2245aa28ba2 100644 --- a/kubeovn-helm/templates/post-upgrade.yaml +++ b/kubeovn-helm/templates/post-upgrade.yaml @@ -1,3 +1,4 @@ +{{ if .Values.restart_ovs }} apiVersion: batch/v1 kind: Job metadata: @@ -64,3 +65,4 @@ spec: - name: kube-ovn-log hostPath: path: /var/log/kube-ovn +{{ end }} diff --git a/kubeovn-helm/values.yaml b/kubeovn-helm/values.yaml index 4a7b3e201ad..b53b9ac79b6 100644 --- a/kubeovn-helm/values.yaml +++ b/kubeovn-helm/values.yaml @@ -96,3 +96,4 @@ imagePullSecrets: [] nameOverride: "" fullnameOverride: "" +restart_ovs: false diff --git a/yamls/ovn-ha.yaml b/yamls/ovn-ha.yaml index 59512850a4a..76491350701 100644 --- a/yamls/ovn-ha.yaml +++ b/yamls/ovn-ha.yaml @@ -87,6 +87,13 @@ rules: - create - patch - update + - apiGroups: + - apps + resources: + - controllerrevisions + verbs: + - get + - list - apiGroups: - coordination.k8s.io resources: @@ -331,7 +338,10 @@ spec: matchLabels: app: ovs updateStrategy: - type: OnDelete + type: RollingUpdate + rollingUpdate: + maxSurge: 100% + maxUnavailable: 0 template: metadata: labels: @@ -365,6 +375,14 @@ spec: valueFrom: fieldRef: fieldPath: status.podIP + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace - name: HW_OFFLOAD value: "false" - name: KUBE_NODE_NAME @@ -398,7 +416,9 @@ spec: exec: command: - bash - - /kube-ovn/ovs-healthcheck.sh + - -c + - LOG_ROTATE=true /kube-ovn/ovs-healthcheck.sh + initialDelaySeconds: 10 periodSeconds: 5 timeoutSeconds: 45 livenessProbe: diff --git a/yamls/ovn.yaml b/yamls/ovn.yaml index fff3cb9119d..859cc9cb740 100644 --- a/yamls/ovn.yaml +++ b/yamls/ovn.yaml @@ -98,6 +98,13 @@ rules: - create - patch - update + - apiGroups: + - apps + resources: + - controllerrevisions + verbs: + - get + - list - apiGroups: - coordination.k8s.io resources: @@ -347,7 +354,10 @@ spec: matchLabels: app: ovs updateStrategy: - type: OnDelete + type: RollingUpdate + rollingUpdate: + maxSurge: 100% + maxUnavailable: 0 template: metadata: labels: @@ -381,6 +391,14 @@ spec: valueFrom: fieldRef: fieldPath: status.podIP + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace - name: HW_OFFLOAD value: "false" - name: KUBE_NODE_NAME @@ -416,7 +434,9 @@ spec: exec: command: - bash - - /kube-ovn/ovs-healthcheck.sh + - -c + - LOG_ROTATE=true /kube-ovn/ovs-healthcheck.sh + initialDelaySeconds: 10 periodSeconds: 5 timeoutSeconds: 45 livenessProbe: