diff --git a/.github/workflows/build-x86-image.yaml b/.github/workflows/build-x86-image.yaml index ff32854c188..b879570661f 100644 --- a/.github/workflows/build-x86-image.yaml +++ b/.github/workflows/build-x86-image.yaml @@ -537,6 +537,9 @@ jobs: name: k8s-conformance-e2e-${{ matrix.ip-family }}-${{ matrix.mode }}-ko-log path: k8s-conformance-e2e-${{ matrix.ip-family }}-${{ matrix.mode }}-ko-log.tar.gz + - name: Check kube ovn pod restarts + run: make check-kube-ovn-pod-restarts + - name: Check valgrind result run: | kubectl -n kube-system rollout restart ds ovs-ovn @@ -694,6 +697,9 @@ jobs: name: k8s-netpol-e2e-${{ matrix.ip-family }}-ko-log path: k8s-netpol-e2e-${{ matrix.ip-family }}-ko-log.tar.gz + - name: Check kube ovn pod restarts + run: make check-kube-ovn-pod-restarts + - name: Check valgrind result run: | kubectl -n kube-system rollout restart ds ovs-ovn @@ -841,6 +847,9 @@ jobs: name: k8s-netpol-legacy-e2e-${{ matrix.ip-family }}-ko-log path: k8s-netpol-legacy-e2e-${{ matrix.ip-family }}-ko-log.tar.gz + - name: Check kube ovn pod restarts + run: make check-kube-ovn-pod-restarts + - name: Check valgrind result run: | kubectl -n kube-system rollout restart deploy ovn-central @@ -972,6 +981,9 @@ jobs: name: cyclonus-netpol-e2e-${{ matrix.ip-family }}-ko-log path: cyclonus-netpol-e2e-${{ matrix.ip-family }}-ko-log.tar.gz + - name: Check kube ovn pod restarts + run: make check-kube-ovn-pod-restarts + - name: Check valgrind result run: | kubectl -n kube-system rollout restart ds ovs-ovn @@ -1130,6 +1142,9 @@ jobs: name: kube-ovn-conformance-e2e-${{ matrix.mode }}-${{ matrix.ip-family }}-ko-log path: kube-ovn-conformance-e2e-${{ matrix.mode }}-${{ matrix.ip-family }}-ko-log.tar.gz + - name: Check kube ovn pod restarts + run: make check-kube-ovn-pod-restarts + - name: Check valgrind result run: | kubectl -n kube-system rollout restart ds ovs-ovn @@ -1283,6 +1298,9 @@ jobs: name: kube-ovn-ic-conformance-e2e-${{ matrix.ip-family }}-ko-log path: kube-ovn-ic-conformance-e2e-${{ matrix.ip-family }}-ko-log.tar.gz + - name: Check kube ovn pod restarts + run: make check-kube-ovn-pod-restarts + multus-conformance-e2e: name: Multus Conformance E2E needs: @@ -1401,6 +1419,9 @@ jobs: name: multus-conformance-e2e-${{ matrix.ip-family }}-ko-log path: multus-conformance-e2e-${{ matrix.ip-family }}-ko-log.tar.gz + - name: Check kube ovn pod restarts + run: make check-kube-ovn-pod-restarts + chart-test: name: Chart Installation/Uninstallation Test needs: build-kube-ovn @@ -1490,6 +1511,9 @@ jobs: - name: Install Kube-OVN run: make kind-install-underlay-logical-gateway-dual + - name: Check kube ovn pod restarts + run: make check-kube-ovn-pod-restarts + - name: Cleanup run: sh -x dist/images/cleanup.sh @@ -1538,6 +1562,9 @@ jobs: ENABLE_LB: "false" run: make kind-install + - name: Check kube ovn pod restarts + run: make check-kube-ovn-pod-restarts + - name: Cleanup run: sh -x dist/images/cleanup.sh @@ -1586,6 +1613,9 @@ jobs: ENABLE_NP: "false" run: make kind-install + - name: Check kube ovn pod restarts + run: make check-kube-ovn-pod-restarts + - name: Cleanup run: sh -x dist/images/cleanup.sh @@ -1691,6 +1721,9 @@ jobs: E2E_BRANCH: ${{ github.base_ref || github.ref_name }} run: make kube-ovn-lb-svc-conformance-e2e + - name: Check kube ovn pod restarts + run: make check-kube-ovn-pod-restarts + webhook-e2e: name: Webhook E2E needs: @@ -1799,6 +1832,9 @@ jobs: name: webhook-e2e-ko-log path: webhook-e2e-ko-log.tar.gz + - name: Check kube ovn pod restarts + run: make check-kube-ovn-pod-restarts + installation-compatibility-test: name: Installation Compatibility Test needs: build-kube-ovn @@ -1855,6 +1891,9 @@ jobs: name: installation-compatibility-test-ko-log path: installation-compatibility-test-ko-log.tar.gz + - name: Check kube ovn pod restarts + run: make check-kube-ovn-pod-restarts + - name: Cleanup run: sh -x dist/images/cleanup.sh @@ -1974,6 +2013,9 @@ jobs: name: cilium-chaining-e2e-ko-log path: cilium-chaining-e2e-ko-log.tar.gz + - name: Check kube ovn pod restarts + run: make check-kube-ovn-pod-restarts + - name: Cleanup run: sh -x dist/images/cleanup.sh @@ -2102,6 +2144,9 @@ jobs: name: kube-ovn-ha-e2e-${{ matrix.ssl }}-${{ matrix.bind-local }}-${{ matrix.ip-family }}-ko-log path: kube-ovn-ha-e2e-${{ matrix.ssl }}-${{ matrix.bind-local }}-${{ matrix.ip-family }}-ko-log.tar.gz + - name: Check kube ovn pod restarts + run: make check-kube-ovn-pod-restarts + - name: Cleanup run: sh -x dist/images/cleanup.sh @@ -2192,6 +2237,9 @@ jobs: name: kube-ovn-submariner-conformance-e2e-ko-log path: kube-ovn-submariner-conformance-e2e-ko-log.tar.gz + - name: Check kube ovn pod restarts + run: make check-kube-ovn-pod-restarts + - name: Cleanup run: sh -x dist/images/cleanup.sh @@ -2310,6 +2358,9 @@ jobs: name: iptables-vpc-nat-gw-conformance-e2e-ko-log path: iptables-vpc-nat-gw-conformance-e2e-ko-log.tar.gz + - name: Check kube ovn pod restarts + run: make check-kube-ovn-pod-restarts + ovn-vpc-nat-gw-conformance-e2e: name: OVN VPC NAT Gateway E2E needs: @@ -2410,6 +2461,22 @@ jobs: E2E_BRANCH: ${{ github.base_ref || github.ref_name }} run: make ovn-vpc-nat-gw-conformance-e2e + - name: kubectl ko log + if: failure() + run: | + make kubectl-ko-log + mv kubectl-ko-log.tar.gz ovn-vpc-nat-gw-conformance-e2e-ko-log.tar.gz + + - name: upload kubectl ko log + uses: actions/upload-artifact@v4 + if: failure() + with: + name: ovn-vpc-nat-gw-conformance-e2e-ko-log + path: ovn-vpc-nat-gw-conformance-e2e-ko-log.tar.gz + + - name: Check kube ovn pod restarts + run: make check-kube-ovn-pod-restarts + push: name: Push Images needs: diff --git a/Makefile b/Makefile index 1b172020cbf..1535e0bbea0 100644 --- a/Makefile +++ b/Makefile @@ -871,6 +871,10 @@ kind-clean-ovn-ic: kind-clean kind-clean-ovn-submariner: kind-clean kind delete cluster --name=kube-ovn1 +.PHONY: check-kube-ovn-pod-restarts +check-kube-ovn-pod-restarts: + bash hack/ci-check-crash.sh + .PHONY: uninstall uninstall: bash dist/images/cleanup.sh diff --git a/hack/ci-check-crash.sh b/hack/ci-check-crash.sh new file mode 100644 index 00000000000..ffb7aa56ea7 --- /dev/null +++ b/hack/ci-check-crash.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +namespace="kube-system" + +exit_code=0 +# check if there are any crashed pods +for pod in `kubectl get pod -n $namespace -l component=network -o name`; do + restartCount=`kubectl get -n $namespace $pod -o jsonpath='{.status.containerStatuses[0].restartCount}'` + # TODO: get restart count for all containers + if [ $restartCount -gt 0 ]; then + exit_code=1 + echo "$pod restarted $restartCount time(s). Logs of the previous instance:" + kubectl logs -p -n $namespace $pod + fi +done + +exit $exit_code