diff --git a/charts/metallb/templates/speaker.yaml b/charts/metallb/templates/speaker.yaml index 77a0f668d3d..b671a66f3fe 100644 --- a/charts/metallb/templates/speaker.yaml +++ b/charts/metallb/templates/speaker.yaml @@ -165,6 +165,8 @@ spec: emptyDir: {} - name: metrics emptyDir: {} + - name: frr-liveness + emptyDir: {} {{- if .Values.prometheus.speakerMetricsTLSSecret }} - name: metrics-certs secret: @@ -190,6 +192,13 @@ spec: volumeMounts: - name: reloader mountPath: /etc/frr_reloader + # Copies the liveness probe script to the shared volume between the speaker and reloader. + - name: cp-liveness + image: {{ .Values.speaker.image.repository }}:{{ .Values.speaker.image.tag | default .Chart.AppVersion }} + command: ["/bin/sh", "-c", "cp -f /liveness.sh /etc/frr_liveness/"] + volumeMounts: + - name: frr-liveness + mountPath: /etc/frr_liveness # Copies the metrics exporter - name: cp-metrics image: {{ .Values.speaker.image.repository }}:{{ .Values.speaker.image.tag | default .Chart.AppVersion }} @@ -323,6 +332,8 @@ spec: mountPath: /var/run/frr - name: frr-conf mountPath: /etc/frr + - name: frr-liveness + mountPath: /etc/frr_liveness # The command is FRR's default entrypoint & waiting for the log file to appear and tailing it. # If the log file isn't created in 60 seconds the tail fails and the container is restarted. # This workaround is needed to have the frr logs as part of kubectl logs -c frr < speaker_pod_name >. @@ -341,6 +352,16 @@ spec: resources: {{- toYaml . | nindent 12 }} {{- end }} + livenessProbe: + exec: + command: ["/etc/frr_liveness/liveness.sh"] + periodSeconds: 5 + failureThreshold: 3 + startupProbe: + exec: + command: ["/etc/frr_liveness/liveness.sh"] + failureThreshold: 30 + periodSeconds: 5 - name: reloader image: {{ .Values.speaker.frr.image.repository }}:{{ .Values.speaker.frr.image.tag | default .Chart.AppVersion }} {{- if .Values.speaker.frr.image.pullPolicy }} diff --git a/config/frr/speaker-patch.yaml b/config/frr/speaker-patch.yaml index a673f437849..6914122a3c2 100644 --- a/config/frr/speaker-patch.yaml +++ b/config/frr/speaker-patch.yaml @@ -23,6 +23,8 @@ spec: emptyDir: {} - name: metrics emptyDir: {} + - name: frr-liveness + emptyDir: {} initContainers: # Copies the initial config files with the right permissions to the shared volume. - name: cp-frr-files @@ -43,6 +45,13 @@ spec: volumeMounts: - name: reloader mountPath: /etc/frr_reloader + # Copies the liveness probe script to the shared volume between the speaker and reloader. + - name: cp-liveness + image: quay.io/metallb/speaker:main + command: ["/bin/sh", "-c", "cp -f /liveness.sh /etc/frr_liveness/"] + volumeMounts: + - name: frr-liveness + mountPath: /etc/frr_liveness # Copies the metrics exporter - name: cp-metrics image: quay.io/metallb/speaker:main @@ -64,6 +73,8 @@ spec: mountPath: /var/run/frr - name: frr-conf mountPath: /etc/frr + - name: frr-liveness + mountPath: /etc/frr_liveness # The command is FRR's default entrypoint & waiting for the log file to appear and tailing it. # If the log file isn't created in 60 seconds the tail fails and the container is restarted. # This workaround is needed to have the frr logs as part of kubectl logs -c frr < speaker_pod_name >. @@ -78,6 +89,16 @@ spec: attempts=$(( $attempts + 1 )) done tail -f /etc/frr/frr.log + livenessProbe: + exec: + command: ["/etc/frr_liveness/liveness.sh"] + periodSeconds: 5 + failureThreshold: 3 + startupProbe: + exec: + command: ["/etc/frr_liveness/liveness.sh"] + failureThreshold: 30 + periodSeconds: 5 - name: reloader image: frrouting/frr:v7.5.1 command: ["/etc/frr_reloader/frr-reloader.sh"] diff --git a/config/manifests/metallb-frr-prometheus.yaml b/config/manifests/metallb-frr-prometheus.yaml index 6924719529b..2e0bfa91c2f 100644 --- a/config/manifests/metallb-frr-prometheus.yaml +++ b/config/manifests/metallb-frr-prometheus.yaml @@ -2104,6 +2104,12 @@ spec: - name: TINI_SUBREAPER value: "true" image: frrouting/frr:v7.5.1 + livenessProbe: + exec: + command: + - /etc/frr_liveness/liveness.sh + failureThreshold: 3 + periodSeconds: 5 name: frr securityContext: capabilities: @@ -2112,11 +2118,19 @@ spec: - NET_RAW - SYS_ADMIN - NET_BIND_SERVICE + startupProbe: + exec: + command: + - /etc/frr_liveness/liveness.sh + failureThreshold: 30 + periodSeconds: 5 volumeMounts: - mountPath: /var/run/frr name: frr-sockets - mountPath: /etc/frr name: frr-conf + - mountPath: /etc/frr_liveness + name: frr-liveness - command: - /etc/frr_reloader/frr-reloader.sh image: frrouting/frr:v7.5.1 @@ -2237,6 +2251,15 @@ spec: volumeMounts: - mountPath: /etc/frr_reloader name: reloader + - command: + - /bin/sh + - -c + - cp -f /liveness.sh /etc/frr_liveness/ + image: quay.io/metallb/speaker:main + name: cp-liveness + volumeMounts: + - mountPath: /etc/frr_liveness + name: frr-liveness - command: - /bin/sh - -c @@ -2270,6 +2293,8 @@ spec: name: reloader - emptyDir: {} name: metrics + - emptyDir: {} + name: frr-liveness - name: memberlist secret: defaultMode: 420 diff --git a/config/manifests/metallb-frr.yaml b/config/manifests/metallb-frr.yaml index 7f999eaf308..1aa28dcc25f 100644 --- a/config/manifests/metallb-frr.yaml +++ b/config/manifests/metallb-frr.yaml @@ -1927,6 +1927,12 @@ spec: - name: TINI_SUBREAPER value: "true" image: frrouting/frr:v7.5.1 + livenessProbe: + exec: + command: + - /etc/frr_liveness/liveness.sh + failureThreshold: 3 + periodSeconds: 5 name: frr securityContext: capabilities: @@ -1935,11 +1941,19 @@ spec: - NET_RAW - SYS_ADMIN - NET_BIND_SERVICE + startupProbe: + exec: + command: + - /etc/frr_liveness/liveness.sh + failureThreshold: 30 + periodSeconds: 5 volumeMounts: - mountPath: /var/run/frr name: frr-sockets - mountPath: /etc/frr name: frr-conf + - mountPath: /etc/frr_liveness + name: frr-liveness - command: - /etc/frr_reloader/frr-reloader.sh image: frrouting/frr:v7.5.1 @@ -2060,6 +2074,15 @@ spec: volumeMounts: - mountPath: /etc/frr_reloader name: reloader + - command: + - /bin/sh + - -c + - cp -f /liveness.sh /etc/frr_liveness/ + image: quay.io/metallb/speaker:main + name: cp-liveness + volumeMounts: + - mountPath: /etc/frr_liveness + name: frr-liveness - command: - /bin/sh - -c @@ -2093,6 +2116,8 @@ spec: name: reloader - emptyDir: {} name: metrics + - emptyDir: {} + name: frr-liveness - name: memberlist secret: defaultMode: 420 diff --git a/frr-tools/liveness/liveness.sh b/frr-tools/liveness/liveness.sh new file mode 100755 index 00000000000..235a2ed5011 --- /dev/null +++ b/frr-tools/liveness/liveness.sh @@ -0,0 +1,12 @@ +#!/bin/bash +set -e + +EXPECTED_DAEMONS=" bfdd bgpd staticd watchfrr zebra " +DAEMONS=$(vtysh -c "show daemons" | tr " " "\n" | sort | tr "\n" " ") + +if [ "$DAEMONS" != "$EXPECTED_DAEMONS" ]; then + echo "Did not find all the expected daemons [$DAEMONS]" + exit 1 +fi + + diff --git a/frr-metrics/collector/bfd.go b/frr-tools/metrics/collector/bfd.go similarity index 100% rename from frr-metrics/collector/bfd.go rename to frr-tools/metrics/collector/bfd.go diff --git a/frr-metrics/collector/bfd_test.go b/frr-tools/metrics/collector/bfd_test.go similarity index 100% rename from frr-metrics/collector/bfd_test.go rename to frr-tools/metrics/collector/bfd_test.go diff --git a/frr-metrics/collector/bgp.go b/frr-tools/metrics/collector/bgp.go similarity index 100% rename from frr-metrics/collector/bgp.go rename to frr-tools/metrics/collector/bgp.go diff --git a/frr-metrics/collector/bgp_test.go b/frr-tools/metrics/collector/bgp_test.go similarity index 100% rename from frr-metrics/collector/bgp_test.go rename to frr-tools/metrics/collector/bgp_test.go diff --git a/frr-metrics/collector/vtysh.go b/frr-tools/metrics/collector/vtysh.go similarity index 100% rename from frr-metrics/collector/vtysh.go rename to frr-tools/metrics/collector/vtysh.go diff --git a/frr-metrics/exporter.go b/frr-tools/metrics/exporter.go similarity index 97% rename from frr-metrics/exporter.go rename to frr-tools/metrics/exporter.go index 506994c79b4..dc452aab467 100644 --- a/frr-metrics/exporter.go +++ b/frr-tools/metrics/exporter.go @@ -16,7 +16,7 @@ import ( "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/prometheus/exporter-toolkit/web" - "go.universe.tf/metallb/frr-metrics/collector" + "go.universe.tf/metallb/frr-tools/metrics/collector" "go.universe.tf/metallb/internal/logging" "go.universe.tf/metallb/internal/version" ) diff --git a/frr-reloader/frr-reloader.sh b/frr-tools/reloader/frr-reloader.sh similarity index 100% rename from frr-reloader/frr-reloader.sh rename to frr-tools/reloader/frr-reloader.sh diff --git a/speaker/Dockerfile b/speaker/Dockerfile index 75ba1b514ff..3e212d80caa 100644 --- a/speaker/Dockerfile +++ b/speaker/Dockerfile @@ -12,7 +12,7 @@ RUN go mod download # Copy speaker COPY speaker/*.go speaker/ # Copy frr-metrics -COPY frr-metrics ./frr-metrics/ +COPY frr-tools/metrics ./frr-tools/metrics/ # COPY internals COPY internal internal COPY api api @@ -36,7 +36,7 @@ RUN --mount=type=cache,target=/root/.cache/go-build \ CGO_ENABLED=0 GOOS=$TARGETOS GOARCH=$TARGETARCH GOARM=$VARIANT \ go build -v -o /build/frr-metrics \ -ldflags "-X 'go.universe.tf/metallb/internal/version.gitCommit=${GIT_COMMIT}' -X 'go.universe.tf/metallb/internal/version.gitBranch=${GIT_BRANCH}'" \ - frr-metrics/exporter.go \ + frr-tools/metrics/exporter.go \ && \ # build speaker CGO_ENABLED=0 GOOS=$TARGETOS GOARCH=$TARGETARCH GOARM=$VARIANT \ @@ -49,7 +49,8 @@ FROM docker.io/alpine:latest COPY --from=builder /build/speaker /speaker COPY --from=builder /build/frr-metrics /frr-metrics -COPY frr-reloader/frr-reloader.sh /frr-reloader.sh +COPY frr-tools/reloader/frr-reloader.sh /frr-reloader.sh +COPY frr-tools/liveness/liveness.sh /liveness.sh COPY LICENSE / LABEL org.opencontainers.image.authors="metallb" \