From 6bde5246fdc313451971c4094d5ec39a54759d7e Mon Sep 17 00:00:00 2001 From: Nicolas Ochem Date: Wed, 2 Nov 2022 16:17:46 -0700 Subject: [PATCH] Service monitor (#501) * add service monitor for octez node * add metrics to mkchain chain by default * change port to 9932, make accessible from anywhere tie servicemonitor with service with a label, allow custom labels for servicemonitor * fix tests * fix black * remove old "metrics" container --- charts/tezos/templates/_containers.tpl | 30 +----- charts/tezos/templates/nodes.yaml | 1 - charts/tezos/templates/servicemonitor.yaml | 17 ++++ charts/tezos/templates/static.yaml | 7 ++ charts/tezos/values.yaml | 14 ++- mkchain/tqchain/mkchain.py | 1 + test/charts/mainnet.expect.yaml | 14 ++- test/charts/mainnet2.expect.yaml | 90 ++++------------- test/charts/private-chain.expect.yaml | 112 ++++++++------------- 9 files changed, 113 insertions(+), 173 deletions(-) create mode 100644 charts/tezos/templates/servicemonitor.yaml diff --git a/charts/tezos/templates/_containers.tpl b/charts/tezos/templates/_containers.tpl index edf865042..8008e5f11 100644 --- a/charts/tezos/templates/_containers.tpl +++ b/charts/tezos/templates/_containers.tpl @@ -158,6 +158,8 @@ name: tezos-rpc - containerPort: 9732 name: tezos-net + - containerPort: 9932 + name: metrics {{- if or (not (hasKey $.node_vals "readiness_probe")) $.node_vals.readiness_probe }} readinessProbe: httpGet: @@ -341,34 +343,6 @@ {{- end }} {{- end }} -{{- define "tezos.container.metrics" }} -{{- if has "metrics" $.node_vals.runs }} -- image: "registry.gitlab.com/nomadic-labs/tezos-metrics" - args: - - "--listen-prometheus=6666" - - "--data-dir=/var/tezos/node/data" - imagePullPolicy: IfNotPresent - name: metrics - ports: - - containerPort: 6666 - name: tezos-metrics - volumeMounts: - - mountPath: /etc/tezos - name: config-volume - - mountPath: /var/tezos - name: var-volume - - mountPath: /etc/secret-volume - name: tezos-accounts - envFrom: - - configMapRef: - name: tezos-config - env: -{{- include "tezos.localvars.pod_envvars" . | indent 4 }} - - name: DAEMON - value: tezos-metrics -{{- end }} -{{- end }} - {{/* // * The zerotier containers: */}} diff --git a/charts/tezos/templates/nodes.yaml b/charts/tezos/templates/nodes.yaml index 10d02cac1..3ac17e082 100644 --- a/charts/tezos/templates/nodes.yaml +++ b/charts/tezos/templates/nodes.yaml @@ -36,7 +36,6 @@ spec: {{- include "tezos.container.accusers" $ | indent 8 }} {{- include "tezos.container.bakers" $ | indent 8 }} {{- include "tezos.container.logger" $ | indent 8 }} - {{- include "tezos.container.metrics" $ | indent 8 }} {{- include "tezos.container.zerotier" $ | indent 8 }} {{- include "tezos.container.sidecar" $ | indent 8 }} {{- include "tezos.container.vdf" $ | indent 8 }} diff --git a/charts/tezos/templates/servicemonitor.yaml b/charts/tezos/templates/servicemonitor.yaml new file mode 100644 index 000000000..683326477 --- /dev/null +++ b/charts/tezos/templates/servicemonitor.yaml @@ -0,0 +1,17 @@ +{{- if .Values.serviceMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: +{{- toYaml .Values.serviceMonitor.labels | nindent 4 }} + name: tezos-service-monitor + namespace: {{ .Release.Namespace }} +spec: + endpoints: + - interval: 15s + port: metrics + path: /metrics + selector: + matchLabels: + appType: tezos-node +{{- end }} diff --git a/charts/tezos/templates/static.yaml b/charts/tezos/templates/static.yaml index 620fade77..e1b8f5b88 100644 --- a/charts/tezos/templates/static.yaml +++ b/charts/tezos/templates/static.yaml @@ -18,7 +18,14 @@ apiVersion: v1 kind: Service metadata: name: {{ $key }} + labels: + appType: tezos-node spec: + ports: + - port: 9732 + name: p2p + - port: 9932 + name: metrics publishNotReadyAddresses: true clusterIP: None selector: diff --git a/charts/tezos/values.yaml b/charts/tezos/values.yaml index ccf03a3a0..bd9d1ea10 100644 --- a/charts/tezos/values.yaml +++ b/charts/tezos/values.yaml @@ -121,7 +121,7 @@ should_generate_unsafe_deterministic_data: false # field up above. # - "runs": A list of containers to run. A tezos node implementation is required. # Options being "octez_node" or "tezedge_node". Other optional -# containers are "accuser", "baker", "logger", "vdf" and "metrics". +# containers are "accuser", "baker", "logger" and "vdf". # - "local_storage": use local storage instead of a volume. The storage will be # wiped when the node restarts for any reason. Useful when # faster IO is desired. Defaults to false. @@ -230,6 +230,7 @@ nodes: config: shell: history_mode: rolling + metrics_addr: [ "0.0.0.0:9932" ] # End nodes ## Configuration for K8s Service resources. Configuring the labels selector of a @@ -242,6 +243,17 @@ services: selector: # rpc_node: "true" +# Prometheus Operator is required in your cluster in order to enable +# serviceMonitor below. +# ServiceMonitor allows you to scrape the prometheus endpoints of your tezos nodes. +# Make sure the endpoints are active by adding: +# metrics_addr: [ ":9932" ] +# to your config. +serviceMonitor: + enabled: false + labels: + # release: my-monitoring-release + # # Signers # # Define remote signers. Bakers automatically use signers in their namespace diff --git a/mkchain/tqchain/mkchain.py b/mkchain/tqchain/mkchain.py index 9c1966c24..83d317f1d 100644 --- a/mkchain/tqchain/mkchain.py +++ b/mkchain/tqchain/mkchain.py @@ -151,6 +151,7 @@ def node_config(name, n, is_baker): "is_bootstrap_node": False, "config": { "shell": {"history_mode": "rolling"}, + "metrics_addr": [":9932"], }, } if is_baker: diff --git a/test/charts/mainnet.expect.yaml b/test/charts/mainnet.expect.yaml index 7d34a0b68..282739d31 100644 --- a/test/charts/mainnet.expect.yaml +++ b/test/charts/mainnet.expect.yaml @@ -42,6 +42,9 @@ data: "instances": [ { "config": { + "metrics_addr": [ + "0.0.0.0:9932" + ], "shell": { "history_mode": "rolling" } @@ -82,7 +85,14 @@ apiVersion: v1 kind: Service metadata: name: rolling-node + labels: + appType: tezos-node spec: + ports: + - port: 9732 + name: p2p + - port: 9932 + name: metrics publishNotReadyAddresses: true clusterIP: None selector: @@ -153,10 +163,12 @@ spec: name: tezos-rpc - containerPort: 9732 name: tezos-net + - containerPort: 9932 + name: metrics readinessProbe: httpGet: path: /is_synced - port: 31732 + port: 31732 - name: sidecar image: "tezos-k8s-utils:dev" imagePullPolicy: IfNotPresent diff --git a/test/charts/mainnet2.expect.yaml b/test/charts/mainnet2.expect.yaml index 63730d9bc..a189d1e15 100644 --- a/test/charts/mainnet2.expect.yaml +++ b/test/charts/mainnet2.expect.yaml @@ -124,7 +124,14 @@ apiVersion: v1 kind: Service metadata: name: city-block + labels: + appType: tezos-node spec: + ports: + - port: 9732 + name: p2p + - port: 9932 + name: metrics publishNotReadyAddresses: true clusterIP: None selector: @@ -135,7 +142,14 @@ apiVersion: v1 kind: Service metadata: name: country-town + labels: + appType: tezos-node spec: + ports: + - port: 9732 + name: p2p + - port: 9932 + name: metrics publishNotReadyAddresses: true clusterIP: None selector: @@ -184,6 +198,8 @@ spec: name: tezos-rpc - containerPort: 9732 name: tezos-net + - containerPort: 9932 + name: metrics readinessProbe: httpGet: path: /is_synced @@ -217,41 +233,7 @@ spec: - mountPath: /etc/tezos name: config-volume - mountPath: /var/tezos - name: var-volume - - image: "registry.gitlab.com/nomadic-labs/tezos-metrics" - args: - - "--listen-prometheus=6666" - - "--data-dir=/var/tezos/node/data" - imagePullPolicy: IfNotPresent - name: metrics - ports: - - containerPort: 6666 - name: tezos-metrics - volumeMounts: - - mountPath: /etc/tezos - name: config-volume - - mountPath: /var/tezos - name: var-volume - - mountPath: /etc/secret-volume - name: tezos-accounts - envFrom: - - configMapRef: - name: tezos-config - env: - - name: MY_POD_IP - valueFrom: - fieldRef: - fieldPath: status.podIP - - name: MY_POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: MY_POD_TYPE - value: node - - name: MY_NODE_CLASS - value: city-block - - name: DAEMON - value: tezos-metrics + name: var-volume - name: sidecar image: "tezos-k8s-utils:dev" imagePullPolicy: IfNotPresent @@ -569,6 +551,8 @@ spec: name: tezos-rpc - containerPort: 9732 name: tezos-net + - containerPort: 9932 + name: metrics readinessProbe: httpGet: path: /is_synced @@ -602,41 +586,7 @@ spec: - mountPath: /etc/tezos name: config-volume - mountPath: /var/tezos - name: var-volume - - image: "registry.gitlab.com/nomadic-labs/tezos-metrics" - args: - - "--listen-prometheus=6666" - - "--data-dir=/var/tezos/node/data" - imagePullPolicy: IfNotPresent - name: metrics - ports: - - containerPort: 6666 - name: tezos-metrics - volumeMounts: - - mountPath: /etc/tezos - name: config-volume - - mountPath: /var/tezos - name: var-volume - - mountPath: /etc/secret-volume - name: tezos-accounts - envFrom: - - configMapRef: - name: tezos-config - env: - - name: MY_POD_IP - valueFrom: - fieldRef: - fieldPath: status.podIP - - name: MY_POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: MY_POD_TYPE - value: node - - name: MY_NODE_CLASS - value: country-town - - name: DAEMON - value: tezos-metrics + name: var-volume - name: sidecar image: "tezos-k8s-utils:dev" imagePullPolicy: IfNotPresent diff --git a/test/charts/private-chain.expect.yaml b/test/charts/private-chain.expect.yaml index 80109c014..3ec546dee 100644 --- a/test/charts/private-chain.expect.yaml +++ b/test/charts/private-chain.expect.yaml @@ -236,7 +236,14 @@ apiVersion: v1 kind: Service metadata: name: af + labels: + appType: tezos-node spec: + ports: + - port: 9732 + name: p2p + - port: 9932 + name: metrics publishNotReadyAddresses: true clusterIP: None selector: @@ -247,7 +254,14 @@ apiVersion: v1 kind: Service metadata: name: as + labels: + appType: tezos-node spec: + ports: + - port: 9732 + name: p2p + - port: 9932 + name: metrics publishNotReadyAddresses: true clusterIP: None selector: @@ -258,7 +272,14 @@ apiVersion: v1 kind: Service metadata: name: eu + labels: + appType: tezos-node spec: + ports: + - port: 9732 + name: p2p + - port: 9932 + name: metrics publishNotReadyAddresses: true clusterIP: None selector: @@ -269,7 +290,14 @@ apiVersion: v1 kind: Service metadata: name: us + labels: + appType: tezos-node spec: + ports: + - port: 9732 + name: p2p + - port: 9932 + name: metrics publishNotReadyAddresses: true clusterIP: None selector: @@ -317,6 +345,8 @@ spec: name: tezos-rpc - containerPort: 9732 name: tezos-net + - containerPort: 9932 + name: metrics readinessProbe: httpGet: path: /is_synced @@ -411,41 +441,7 @@ spec: - mountPath: /etc/tezos name: config-volume - mountPath: /var/tezos - name: var-volume - - image: "registry.gitlab.com/nomadic-labs/tezos-metrics" - args: - - "--listen-prometheus=6666" - - "--data-dir=/var/tezos/node/data" - imagePullPolicy: IfNotPresent - name: metrics - ports: - - containerPort: 6666 - name: tezos-metrics - volumeMounts: - - mountPath: /etc/tezos - name: config-volume - - mountPath: /var/tezos - name: var-volume - - mountPath: /etc/secret-volume - name: tezos-accounts - envFrom: - - configMapRef: - name: tezos-config - env: - - name: MY_POD_IP - valueFrom: - fieldRef: - fieldPath: status.podIP - - name: MY_POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: MY_POD_TYPE - value: node - - name: MY_NODE_CLASS - value: af - - name: DAEMON - value: tezos-metrics + name: var-volume - name: sidecar image: "tezos-k8s-utils:dev" imagePullPolicy: IfNotPresent @@ -637,10 +633,12 @@ spec: name: tezos-rpc - containerPort: 9732 name: tezos-net + - containerPort: 9932 + name: metrics readinessProbe: httpGet: path: /is_synced - port: 31732 + port: 31732 - name: sidecar image: "tezos-k8s-utils:dev" imagePullPolicy: IfNotPresent @@ -834,6 +832,8 @@ spec: name: tezos-rpc - containerPort: 9732 name: tezos-net + - containerPort: 9932 + name: metrics readinessProbe: httpGet: path: /is_synced @@ -928,41 +928,7 @@ spec: - mountPath: /etc/tezos name: config-volume - mountPath: /var/tezos - name: var-volume - - image: "registry.gitlab.com/nomadic-labs/tezos-metrics" - args: - - "--listen-prometheus=6666" - - "--data-dir=/var/tezos/node/data" - imagePullPolicy: IfNotPresent - name: metrics - ports: - - containerPort: 6666 - name: tezos-metrics - volumeMounts: - - mountPath: /etc/tezos - name: config-volume - - mountPath: /var/tezos - name: var-volume - - mountPath: /etc/secret-volume - name: tezos-accounts - envFrom: - - configMapRef: - name: tezos-config - env: - - name: MY_POD_IP - valueFrom: - fieldRef: - fieldPath: status.podIP - - name: MY_POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: MY_POD_TYPE - value: node - - name: MY_NODE_CLASS - value: eu - - name: DAEMON - value: tezos-metrics + name: var-volume - name: sidecar image: "tezos-k8s-utils:dev" imagePullPolicy: IfNotPresent @@ -1155,6 +1121,8 @@ spec: name: tezos-rpc - containerPort: 9732 name: tezos-net + - containerPort: 9932 + name: metrics readinessProbe: httpGet: path: /is_synced @@ -1221,7 +1189,7 @@ spec: - mountPath: /var/tezos name: var-volume - mountPath: /etc/tezos/per-block-votes - name: per-block-votes + name: per-block-votes - name: sidecar image: "tezos-k8s-utils:dev" imagePullPolicy: IfNotPresent