diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..e491144 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,26 @@ +name: Go +on: + push: + branches: ["master"] + pull_request: + branches: ["master"] + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v5 + + - name: Setup Go + uses: actions/setup-go@v6 + with: + go-version: '1.25.x' + + - name: Install golangci-lint + run: | + curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/HEAD/install.sh \ + | sh -s -- -b $(go env GOPATH)/bin v2.5.0 + + - name: Test + run: make all diff --git a/.gitignore b/.gitignore index aaadf73..25f3d24 100644 --- a/.gitignore +++ b/.gitignore @@ -28,5 +28,8 @@ go.work.sum .env # Editor/IDE -# .idea/ -# .vscode/ +.idea/ +.vscode/ + +# Generated dashboards +generated \ No newline at end of file diff --git a/.golangci.yml b/.golangci.yml new file mode 100644 index 0000000..184edbb --- /dev/null +++ b/.golangci.yml @@ -0,0 +1,43 @@ +version: "2" + +run: + timeout: 5m + allow-parallel-runners: true + relative-path-mode: cfg + +linters: + default: none + enable: + - dupl + - errcheck + - copyloopvar + - ginkgolinter + - goconst + - gocyclo + - govet + - ineffassign + - misspell + - nakedret + - unconvert + - unparam + - unused + - staticcheck + disable: + - prealloc + - revive + + settings: + revive: + rules: + - name: comment-spacings + +formatters: + enable: + - gofmt + - goimports + # example settings for formatters; remove if unused + settings: + gofmt: + simplify: true + +issues: {} \ No newline at end of file diff --git a/.tool-versions b/.tool-versions new file mode 100644 index 0000000..db88104 --- /dev/null +++ b/.tool-versions @@ -0,0 +1,2 @@ +golang 1.25.1 +golangci-lint 2.5.0 \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..a6f639a --- /dev/null +++ b/Makefile @@ -0,0 +1,63 @@ +##@ General + +# The help target prints out all targets with their descriptions organized +# beneath their categories. The categories are represented by '##@' and the +# target descriptions by '##'. The awk command is responsible for reading the +# entire set of makefiles included in this invocation, looking for lines of the +# file as xyz: ## something, and then pretty-format the target and help. Then, +# if there's a line with ##@ something, that gets pretty-printed as a category. +# More info on the usage of ANSI control characters for terminal formatting: +# https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters +# More info on the awk command: +# http://linuxcommand.org/lc3_adv_awk.php + +.PHONY: help +help: ## Display this help. + @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) + +.PHONY: all +all: fmt vet lint test + +##@ Development +.PHONY: fmt +fmt: ## Run go fmt against code. + go fmt ./... + +.PHONY: vet +vet: ## Run go vet against code. + go vet ./... + +.PHONY: lint +lint: ## Run linter + golangci-lint run + +.PHONY: lint-fix +lint-fix: ## Run linter and fix issues + golangci-lint run --fix + +.PHONY: test +test: ## Run project tests + go test ./... + +.PHONY: benchmark +benchmark: ## Run project benchmarks + go test -bench=. -benchtime=10000x -benchmem ./... + +METRIC_NAMESPACE ?= unset + +##@ Generate +.PHONY: dashboards +dashboards: ## Generate dashboards from templates + @# Fail if METRIC_NAMESPACE is unset + @[ "$(METRIC_NAMESPACE)" != "unset" ] && [ -n "$(METRIC_NAMESPACE)" ] || { \ + echo "Error: METRIC_NAMESPACE is required."; \ + echo "Usage: make dashboards METRIC_NAMESPACE=my_operator"; \ + exit 1; \ + } + @echo "Generating dashboards for $(METRIC_NAMESPACE)…" + @mkdir -p generated/dashboards + @find dashboards -type f -name '*.tpl.json' | while IFS= read -r file; do \ + base_name=`basename "$$file" .tpl.json`; \ + new_file="generated/dashboards/$$base_name.json"; \ + sed "s/{{operator_namespace}}/$(METRIC_NAMESPACE)_/g" "$$file" > "$$new_file"; \ + done diff --git a/README.md b/README.md new file mode 100644 index 0000000..ddd2e83 --- /dev/null +++ b/README.md @@ -0,0 +1,279 @@ +# CRD Condition Metrics + +A simple and easy to integrate metric recording utility for kubernetes operators, giving you metrics +which are representative—and kept in line with your [CRD status Conditions](https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#typical-status-properties). + +This package is built on the [Prometheus GaugeVecSet implementation for go](https://github.com/sourcehawk/go-prometheus-gaugevecset). + +## 📚 Table of Contents + +1. [Features](#features) +2. [Installation](#installation) +3. [Motivation](#motivation) +4. [Setup: Operator Initialization](#operator-initialization) +5. [Setup: Controller Usage](#controller-usage) +6. [PromQL Usage Examples](#promql-usage-examples) + +--- + +## Features +- **Ensures consistency between your CRD statuses and your metrics**: The metrics are based on your status conditions and + synced when you update the conditions. +- **Easy integration**: Get metrics anywhere with little initial setup and a simple method calls. +- **Light weight and performant**: Small memory footprint at large scale, fast ops. +- **Keeps cardinality under control**: Only 1 metric series per (custom resource, condition type) combination. + Gives you low cardinality even with thousands of unique label combinations. +- [Dashboards available](/dashboards) to get you started! + +--- + +### Installation + +Install the go package + +```go +go get github.com/sourcehawk/go-crd-condition-metrics +``` + +Importing it: + +```go +import ( + ccm "github.com/sourcehawk/go-crd-condition-metrics/pkg/crd-condition-metrics" +) +``` + +--- + +## Motivation + +Creating meaningful metrics for custom resources is an essential part of building observability into any Kubernetes +operator or controller. But despite its importance, there’s a lack of standardization—especially when it comes to +exposing metrics that accurately reflect the actual `status` of a CRD. + +In Kubernetes, the `status.conditions` field has become the de facto convention for representing the state of a +resource. It captures key lifecycle signals such as `Ready`, `Reconciled`, `Degraded`, or `FailedToProvision`, along +with rich metadata like `reason`, `status`, and `lastTransitionTime`. + +This package was created to **standardize the way we expose those conditions as metrics**, allowing you to: +- Derive metrics directly from your resource’s `status.conditions` +- Keep metric values and labels fully in sync with the real resource state +- Avoid excessive metric cardinality +- Gain visibility into when a condition last transitioned + +### Pattern inspiration: kube-state-metrics + +This metric strategy is inspired by `kube_pod_status_phase` from [kube-state-metrics](https://github.com/kubernetes/kube-state-metrics), +which exports one time series per `phase` for each `(namespace, pod)` pair and marks exactly one as active (`1`) while +the others are set to inactive (`0`). + +Example: + +``` +kube_pod_status_phase{namespace="default", pod="nginx", phase="Running"} 1 +kube_pod_status_phase{namespace="default", pod="nginx", phase="Pending"} 0 +kube_pod_status_phase{namespace="default", pod="nginx", phase="Failed"} 0 +``` + +We adopt a similar idea for `status.conditions`, but with some key differences: + +- We expose **only one time series per (custom resource, condition type)**. All other condition variants + (status/reason combinations) are removed when a new one is set. +- Instead of using binary values (`1` or `0`), we set the **Unix timestamp of `lastTransitionTime`** as the metric + value. This allows you to query when a condition was last updated. + +Example metric from this package: + +``` +my_operator_controller_condition{ + controller="my_controller", + kind="MyCR", + name="my-cr", + namespace="default", + condition="Ready", + status="False", + reason="FailedToProvision" +} 17591743210 +``` + +This makes it easy to build dashboards and alerts like: +- Show all CRs currently in a non-`Ready` state +- Alert if a CR has been stuck in a given condition for too long +- Visualize how long a CR has remained in its current status + +### Why this matters + +When operating controllers at scale, consistency and cardinality matter. Metrics should reflect the actual resource +state—not drift from it—and they should not grow uncontrollably as conditions change. + +This package gives you a lightweight, plug-and-play way to track CRD condition metrics correctly, consistently, and +with full context. + +--- + +## Operator Initialization + +The metric should be initialized and registered once. + +You can embed the `ConditionMetricRecorder` in your controller's recorder. + +```go +package my_metrics + +import ( + controllermetrics "sigs.k8s.io/controller-runtime/pkg/metrics" + ccm "github.com/sourcehawk/go-crd-condition-metrics/pkg/crd-condition-metrics" +) + +// We need this variable later to create the ConditionMetricsRecorder +var OperatorConditionsGauge *ccm.OperatorConditionsGauge + +// Initialize the operator condition gauge once +func init() { + OperatorConditionsGauge = ccm.NewOperatorConditionsGauge("my_operator") + controllermetrics.Registry.MustRegister(OperatorConditionsGauge) +} + +// Embed in existing metrics recorder +type MyControllerRecorder struct { + ccm.ConditionMetricRecorder +} +``` + +When constructing your reconciler, initialize the condition metrics recorder with the +operator conditions gauge and a unique name for each controller. + +_cmd/main.go_ +```go +package main + +import ( + mymetrics "path/to/pkg/my_metrics" + ccm "github.com/sourcehawk/go-crd-condition-metrics/pkg/crd-condition-metrics" +) + +func main() { + // ... + recorder := mymetrics.MyControllerRecorder{ + ConditionMetricRecorder: ccm.ConditionMetricRecorder{ + Controller: "my-controller", // unique name per reconciler + OperatorConditionsGauge: mymetrics.OperatorConditionsGauge, + }, + } + + reconciler := &MyReconciler{ + Recorder: recorder, + } + // ... +} +``` + +--- + +## Controller Usage + +The easiest drop-in way to start using the metrics recorder is by creating a `SetStatusCondition` wrapper, which +comes instead of `meta.SetStatusCondition`. We call `RecordConditionFor` to record our metrics. + +To delete the metrics for a given custom resource, simply call `RemoveConditionsFor` and pass the object. + +```go +const ( + kind = "MyCR" +) + +// SetStatusCondition utility function which replaces and wraps meta.SetStatusCondition calls +func (r *MyReconciler) SetStatusCondition(cr *v1.MyCR, cond metav1.Condition) bool { + changed := meta.SetStatusCondition(&cr.Status.Conditions, cond) + // refetch the condition to get the updated version + updated := meta.FindStatusCondition(cr.Status.Conditions, cond.Type) + if updated != nil { + r.Recorder.RecordConditionFor( + kind, cr, updated.Type, string(updated.Status), updated.Reason, updated.LastTransitionTime, + ) + } + return changed +} + +func (r *MyReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + // Get the resource we're reconciling + cr := new(v1.MyCR) + if err = r.Get(ctx, req.NamespacedName, cr); err != nil { + return ctrl.Result{}, client.IgnoreNotFound(err) + } + + // Remove the metrics when the CR is deleted + if cr.DeletionTimeStamp != nil { + r.Recorder.RemoveConditionsFor(kind, cr) + } + + // ... + + // Update the status conditions using our wrapper function + if r.SetStatusCondition(cr, condition) { + if err = r.Status().Update(ctx, cr); err != nil { + return ctrl.Result{}, err + } + } + + return ctrl.Result{}, nil +} +``` + +--- + +## PromQL usage examples + +Here are some examples of how we can query the metrics. + +The examples assume the `OperatorConditionsGauge` was +initialized with the namespace `my_operator` which results in the metric name being `my_operator_controller_condition`. + +In code: +```go +OperatorConditionsGauge = ccm.NewOperatorConditionsGauge("my_operator") +``` + +> [!IMPORTANT] +> Most of the time, the `namespace` label is reserved by the pod scraping the metrics. +> The `namespace` label we set is therefore in most cases labeled as `exported_namespace`. +> **The examples do not assume this to be the case.** + +Get all CR's of kind `App` that have the condition `Ready` set to `False`. + +```promql +my_operator_controller_condition{ + kind="App", + condition="Ready", + status="False", +} +``` + +Output: + +``` +my_operator_controller_condition{condition="Ready", controller="myctrlr", namespace="ns-1", id="ns-1/my-app-1", kind="App", name="my-app-1", reason="Foo", status="False"} 1759416292 +my_operator_controller_condition{condition="Ready", controller="myctrlr", namespace="ns-1", id="ns-1/my-app-2", kind="App", name="my-app-2", reason="Bar", status="False"} 1759329097 +my_operator_controller_condition{condition="Ready", controller="myctrlr", namespace="ns-2", id="ns-2/my-app", kind="App", name="my-app", reason="Foo", status="False"} 1759329145 +my_operator_controller_condition{condition="Ready", controller="myctrlr", namespace="ns-3", id="ns-3/my-app", kind="App", name="my-app", reason="Foo", status="False"} 1759406280 +``` + +--- + +Count the number of CR's of kind `App` that have `Ready` condition status `False` + +``` +count( + my_operator_controller_condition{ + kind="App", + condition="Ready", + status="False", + } > 0 +) +``` + +Output: + +``` +4 +``` \ No newline at end of file diff --git a/dashboards/README.md b/dashboards/README.md new file mode 100644 index 0000000..2b66ed0 --- /dev/null +++ b/dashboards/README.md @@ -0,0 +1,23 @@ +# Grafana dashboards + +The grafana dashboards are templated. To get a dashboard that fits your metric name, run the following command, using +the name of your metric namespace as input. + +```bash +make dashboards METRIC_NAMESPACE=your_operator +``` + +The generated files are placed under `generated/dashboards`. + +The name of your metric namespace is decided by the initialization of `OperatorConditionsGauge`. For instance, here +the namespace is `my_operator`: + +```go +OperatorConditionsGauge = ccm.NewOperatorConditionsGauge("my_operator") +``` + +## CRD Condition Browser Dashboard + +![crd_conditions_browser_dashboard_1.png](/docs/crd_conditions_browser_dashboard_1.png) + +![crd_conditions_browser_dashboard_2.png](/docs/crd_conditions_browser_dashboard_2.png) \ No newline at end of file diff --git a/dashboards/crd_conditions_browser.tpl.json b/dashboards/crd_conditions_browser.tpl.json new file mode 100644 index 0000000..d0f1cfe --- /dev/null +++ b/dashboards/crd_conditions_browser.tpl.json @@ -0,0 +1,740 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Dashboard to monitor operator custom resource conditions", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 177601, + "links": [], + "liveNow": true, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 10, + "panels": [], + "title": "Operator Conditions", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Conditions matching the given filter criteria ", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "semi-dark-blue", + "mode": "fixed" + }, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 5, + "x": 0, + "y": 1 + }, + "id": 6, + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "same_as_value", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "value", + "wideLayout": true + }, + "pluginVersion": "12.1.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "count(\n {{operator_namespace}}controller_condition{kind=\"$kind\", condition=~\"$condition\", status=~\"$status\", reason=~\"$reason\"} > 0\n)", + "legendFormat": "Total", + "range": true, + "refId": "A" + } + ], + "title": "Total Conditions Matching", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Table of $kind conditions of type $condition", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "transparent", + "mode": "fixed" + }, + "custom": { + "align": "auto", + "cellOptions": { + "applyToRow": false, + "mode": "basic", + "type": "color-background", + "wrapText": true + }, + "filterable": true, + "inspect": true + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "text", + "value": 0 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Namespace" + }, + "properties": [ + { + "id": "custom.width", + "value": 397 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Since" + }, + "properties": [ + { + "id": "unit", + "value": "dateTimeFromNow" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Status" + }, + "properties": [ + { + "id": "mappings", + "value": [ + { + "options": { + "False": { + "color": "red", + "index": 1, + "text": "False" + }, + "True": { + "color": "semi-dark-green", + "index": 0, + "text": "True" + }, + "Unknown": { + "color": "semi-dark-yellow", + "index": 2, + "text": "Unknown" + } + }, + "type": "value" + } + ] + } + ] + } + ] + }, + "gridPos": { + "h": 13, + "w": 24, + "x": 0, + "y": 5 + }, + "id": 5, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "12.1.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(\n {{operator_namespace}}controller_condition{\n kind=\"$kind\",\n condition=~\"$condition\",\n status=~\"$status\",\n reason=~\"$reason\",\n exported_namespace=~\"$namespace\"\n }\n) by (name, exported_namespace, condition, status, reason)", + "format": "table", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "$kind Conditions", + "transformations": [ + { + "id": "calculateField", + "options": { + "alias": "value_ms", + "binary": { + "left": { + "matcher": { + "id": "byName", + "options": "Value" + } + }, + "operator": "*", + "right": { + "fixed": "1000" + } + }, + "mode": "binary", + "reduce": { + "reducer": "sum" + } + } + }, + { + "id": "convertFieldType", + "options": { + "conversions": [ + { + "destinationType": "time", + "targetField": "value_ms" + } + ], + "fields": {} + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": true + }, + "includeByName": {}, + "indexByName": { + "Time": 0, + "Value": 7, + "condition": 3, + "exported_namespace": 2, + "name": 1, + "reason": 5, + "status": 4, + "value_ms": 6 + }, + "renameByName": { + "Value": "", + "condition": "Condition", + "exported_namespace": "Namespace", + "name": "Name", + "reason": "Reason", + "status": "Status", + "value_ms": "Since" + } + } + } + ], + "type": "table" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 9, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Number of $kind $condition Conditions in a False state", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "semi-dark-red", + "mode": "thresholds" + }, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "semi-dark-red", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 7, + "options": { + "colorMode": "background_solid", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "same_as_value", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "value", + "wideLayout": true + }, + "pluginVersion": "12.1.1", + "repeat": "condition", + "repeatDirection": "h", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "count(\n {{operator_namespace}}controller_condition{kind=\"$kind\", condition=~\"$condition\", status=\"False\", reason=~\"$reason\"} > 0\n)", + "legendFormat": "Total", + "range": true, + "refId": "A" + } + ], + "title": "$condition=False", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "thanos-global-view" + }, + "description": "Number of $kind $condition Conditions in a Unknown state", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "semi-dark-red", + "mode": "thresholds" + }, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "semi-dark-yellow", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 24, + "x": 0, + "y": 24 + }, + "id": 11, + "options": { + "colorMode": "background_solid", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "same_as_value", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "value", + "wideLayout": true + }, + "pluginVersion": "12.1.1", + "repeat": "condition", + "repeatDirection": "h", + "targets": [ + { + "editorMode": "code", + "expr": "count(\n {{operator_namespace}}controller_condition{kind=\"$kind\", condition=~\"$condition\", status=\"Unknown\", reason=~\"$reason\"} > 0\n)", + "legendFormat": "Total", + "range": true, + "refId": "A" + } + ], + "title": "$condition=Unknown", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Number of $kind $condition Conditions in a True state", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "semi-dark-red", + "mode": "thresholds" + }, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 24, + "x": 0, + "y": 29 + }, + "id": 8, + "options": { + "colorMode": "background_solid", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "same_as_value", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "value", + "wideLayout": true + }, + "pluginVersion": "12.1.1", + "repeat": "condition", + "repeatDirection": "h", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "count(\n {{operator_namespace}}controller_condition{kind=\"$kind\", condition=~\"$condition\", status=\"True\", reason=~\"$reason\"} > 0\n)", + "legendFormat": "Total", + "range": true, + "refId": "A" + } + ], + "title": "$condition=True", + "type": "stat" + } + ], + "title": "Status Counts Per Condition", + "type": "row" + } + ], + "preload": false, + "refresh": "1m", + "schemaVersion": 41, + "tags": [ + "kubernetes", + "kubernetes-operator" + ], + "templating": { + "list": [ + { + "current": { + "text": "", + "value": "" + }, + "description": "The data source to get the metrics from", + "label": "Datasource", + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "current": { + "text": "", + "value": "" + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "label_values({{operator_namespace}}controller_condition,kind)", + "description": "The resource kind to view", + "label": "Kind", + "name": "kind", + "options": [], + "query": { + "qryType": 1, + "query": "label_values({{operator_namespace}}controller_condition,kind)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "All", + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "label_values({{operator_namespace}}controller_condition{kind=\"$kind\"},condition)", + "description": "The condition type to display", + "includeAll": true, + "label": "Condition", + "multi": true, + "name": "condition", + "options": [], + "query": { + "qryType": 1, + "query": "label_values({{operator_namespace}}controller_condition{kind=\"$kind\"},condition)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "type": "query" + }, + { + "allowCustomValue": true, + "current": { + "text": "All", + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "label_values({{operator_namespace}}controller_condition{kind=\"$kind\", condition=~\"$condition\"},status)", + "description": "Filter for a specific status", + "includeAll": true, + "label": "Status", + "multi": true, + "name": "status", + "options": [], + "query": { + "qryType": 1, + "query": "label_values({{operator_namespace}}controller_condition{kind=\"$kind\", condition=~\"$condition\"},status)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "All", + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "label_values({{operator_namespace}}controller_condition{kind=\"$kind\", condition=~\"$condition\", status=~\"$status\"},reason)", + "description": "Filter for condition reason", + "includeAll": true, + "label": "Reason", + "multi": true, + "name": "reason", + "options": [], + "query": { + "qryType": 1, + "query": "label_values({{operator_namespace}}controller_condition{kind=\"$kind\", condition=~\"$condition\", status=~\"$status\"},reason)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "All", + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "query_result(sum ({{operator_namespace}}controller_condition{kind=\"$kind\", condition=~\"$condition\"} > 0) by (exported_namespace))", + "description": "The namespaces in which to list the resources", + "includeAll": true, + "label": "Namespace", + "multi": true, + "name": "namespace", + "options": [], + "query": { + "qryType": 3, + "query": "query_result(sum ({{operator_namespace}}controller_condition{kind=\"$kind\", condition=~\"$condition\"} > 0) by (exported_namespace))", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "^.*exported_namespace=\"([^\"]+)\".*$", + "sort": 1, + "type": "query" + }, + { + "allowCustomValue": false, + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "label_values({{operator_namespace}}controller_condition{kind=\"$kind\", condition=~\"$condition\", status=~\"$status\"},id)", + "description": "Namespace and resource name combination", + "hide": 2, + "includeAll": true, + "multi": true, + "name": "resource_id", + "options": [], + "query": { + "qryType": 1, + "query": "label_values({{operator_namespace}}controller_condition{kind=\"$kind\", condition=~\"$condition\", status=~\"$status\"},id)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "baseFilters": [], + "datasource": { + "type": "prometheus", + "uid": "" + }, + "filters": [], + "name": "Filters", + "type": "adhoc" + } + ] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": { + "hidden": true + }, + "timezone": "browser", + "title": "CRD Conditions Browser", + "uid": "crd_conditions_browser", + "version": 1 +} \ No newline at end of file diff --git a/docs/crd_conditions_browser_dashboard_1.png b/docs/crd_conditions_browser_dashboard_1.png new file mode 100644 index 0000000..050c43a Binary files /dev/null and b/docs/crd_conditions_browser_dashboard_1.png differ diff --git a/docs/crd_conditions_browser_dashboard_2.png b/docs/crd_conditions_browser_dashboard_2.png new file mode 100644 index 0000000..50797f0 Binary files /dev/null and b/docs/crd_conditions_browser_dashboard_2.png differ diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..b26478f --- /dev/null +++ b/go.mod @@ -0,0 +1,25 @@ +module github.com/sourcehawk/go-crd-condition-metrics + +go 1.25 + +require ( + github.com/prometheus/client_golang v1.23.2 + github.com/prometheus/common v0.66.1 + github.com/sourcehawk/go-prometheus-gaugevecset v1.1.0 + github.com/stretchr/testify v1.11.1 +) + +require ( + github.com/beorn7/perks v1.0.1 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/kylelemons/godebug v1.1.0 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/prometheus/client_model v0.6.2 // indirect + github.com/prometheus/procfs v0.16.1 // indirect + go.yaml.in/yaml/v2 v2.4.2 // indirect + golang.org/x/sys v0.35.0 // indirect + google.golang.org/protobuf v1.36.8 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..f5b41c5 --- /dev/null +++ b/go.sum @@ -0,0 +1,45 @@ +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= +github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o= +github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= +github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= +github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= +github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs= +github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA= +github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg= +github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is= +github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= +github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= +github.com/sourcehawk/go-prometheus-gaugevecset v1.1.0 h1:M0R2IZrKYT9dvJ+2bnPb69UVfFzHmS1pkBNbRTbXkM4= +github.com/sourcehawk/go-prometheus-gaugevecset v1.1.0/go.mod h1:lNJLSekPoA5pzxuyNMNf5XrE5Kvbaqe3kZ4Fe06R1rg= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI= +go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU= +golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= +golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc= +google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/pkg/crd-condition-metrics/metrics.go b/pkg/crd-condition-metrics/metrics.go new file mode 100644 index 0000000..20fbcbf --- /dev/null +++ b/pkg/crd-condition-metrics/metrics.go @@ -0,0 +1,116 @@ +package crd_condition_metrics + +import ( + "fmt" + "time" + + metrics "github.com/sourcehawk/go-prometheus-gaugevecset/pkg/gauge-vec-set" +) + +const ( + operatorConditionMetricSubsystem = "controller" + operatorConditionMetricName = "condition" + operatorConditionMetricHelp = "Condition status for a custom resource; one time series per (custom resource, condition type) combination." +) + +var ( + indexLabels = []string{"controller", "kind", "name", "namespace"} + groupLabels = []string{"condition"} + extraLabels = []string{"status", "reason", "id"} +) + +type OperatorConditionsGauge struct { + *metrics.GaugeVecSet +} + +// NewOperatorConditionsGauge creates a new OperatorConditionsGauge for an operator. +// Initialize once (e.g., in your package init or setup) and use in all implementations of ConditionMetricRecorder. +// +// var OperatorConditionsGauge *OperatorConditionsGauge = nil +// +// func init() { +// OperatorConditionsGauge = NewOperatorConditionsGauge("my-operator") +// controllermetrics.Registry.MustRegister(OperatorConditionsGauge) +// } +func NewOperatorConditionsGauge(metricNamespace string) *OperatorConditionsGauge { + return &OperatorConditionsGauge{ + metrics.NewGaugeVecSet( + metricNamespace, + operatorConditionMetricSubsystem, + operatorConditionMetricName, + operatorConditionMetricHelp, + indexLabels, + groupLabels, + extraLabels..., + ), + } +} + +type ObjectLike interface { + GetName() string + GetNamespace() string +} + +// ConditionMetricRecorder records metrics for Kubernetes style `metav1.Condition` +// objects on custom resources, using a Prometheus gauge. +// +// Usage: +// +// type MyControllerRecorder struct { +// gvs.ConditionMetricRecorder +// } +// +// r := MyControllerRecorder{ +// ConditionMetricRecorder: gvs.ConditionMetricRecorder{ +// Controller: "my-controller", +// OperatorConditionsGauge: my_metrics.OperatorConditionsGauge, +// }, +// } +type ConditionMetricRecorder struct { + // The name of the controller the condition metrics are for + Controller string + // The OperatorConditionsGauge initialized by NewOperatorConditionsGauge + OperatorConditionsGauge *OperatorConditionsGauge +} + +// RecordConditionFor sets a condition metric for a given controller and object. +// +// It enforces exclusivity within the same (custom resource, condition type), ensuring that only the latest +// (status, phase) is present for a given custom resource. +// +// If the last transition time is zero, the value of the metric is set to the unix timestamp for time.Now().UTC() +// +// Example: +// +// c := metav1.Condition{ +// Type: "Ready", +// Status: metav1.ConditionTrue, +// Reason: "AppReady", +// LastTransitionTime: metav1.Now(), +// } +// +// r.RecordConditionFor(kind, obj, c.Type, string(c.Status), c.Reason, c.LastTransitionTime.Time) +func (r *ConditionMetricRecorder) RecordConditionFor( + kind string, object ObjectLike, + conditionType, conditionStatus, conditionReason string, lastTransitionTime time.Time, +) { + id := fmt.Sprintf("%s/%s", object.GetNamespace(), object.GetName()) + indexValues := []string{r.Controller, kind, object.GetName(), object.GetNamespace()} + groupValues := []string{conditionType} + extraValues := []string{conditionStatus, conditionReason, id} + + if lastTransitionTime.IsZero() { + lastTransitionTime = time.Now().UTC() + } + + r.OperatorConditionsGauge.SetGroup(float64(lastTransitionTime.Unix()), indexValues, groupValues, extraValues...) +} + +// RemoveConditionsFor deletes all condition metrics for a given resource. +// This removes all condition types (e.g., Ready, Reconciled) for the resource in one call. +// +// Typically called when the object is deleted or no longer relevant to the controller (Deletion reconcile). +// Returns the number of time series deleted. +func (r *ConditionMetricRecorder) RemoveConditionsFor(kind string, object ObjectLike) (removed int) { + return r.OperatorConditionsGauge.DeleteByIndex(r.Controller, kind, object.GetName(), object.GetNamespace()) +} diff --git a/pkg/crd-condition-metrics/metrics_benchmark_test.go b/pkg/crd-condition-metrics/metrics_benchmark_test.go new file mode 100644 index 0000000..857a5c9 --- /dev/null +++ b/pkg/crd-condition-metrics/metrics_benchmark_test.go @@ -0,0 +1,176 @@ +package crd_condition_metrics + +import ( + "bytes" + "fmt" + "testing" + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/expfmt" +) + +/* +Run: + go test -run '^$' -bench . -benchtime=10000x -benchmem ./pkg/operator_condition_metrics +*/ + +// Let's benchmark against a somewhat realistic high usage scenario +const ( + controllerCount = 10 + resourcesPerController = 200 + conditionsPerController = 3 + variantsPerCondition = 10 + // Maximum total time series variants: 10 * 200 * 3 * 10 = 60k + // In our configuration however, we expect only one variant per condition to be exported. + // Maximum total exported time series: 10 * 200 * 3 * 1 = 6k + maxCardinality = controllerCount * resourcesPerController * conditionsPerController * variantsPerCondition +) + +func generatedName(prefix string, i int) string { + return fmt.Sprintf("%s%d", prefix, i) +} + +type FakeObject struct { + Name string + Namespace string +} + +func (f *FakeObject) GetName() string { + return f.Name +} + +func (f *FakeObject) GetNamespace() string { + return f.Namespace +} + +type FakeCondition struct { + Type string + Status string + Reason string +} + +func createBenchmarkScenario(tb testing.TB, registry *prometheus.Registry) *ConditionMetricRecorder { + tb.Helper() + + ns := "bench_ns_" + generatedName("", tb.(*testing.B).N) + gauge := NewOperatorConditionsGauge(ns) + _ = registry.Register(gauge) + tb.Cleanup(func() { + registry.Unregister(gauge) + }) + + rec := &ConditionMetricRecorder{ + Controller: "my-controller", + OperatorConditionsGauge: gauge, + } + + obj := &FakeObject{} + transitionTime := time.Now().UTC() + + condition := &FakeCondition{ + Status: "True", // doesn't matter, cardinality decided by Reason + } + + for i := 0; i < controllerCount; i++ { + kind := generatedName("Controller", i) + + for j := 0; j < resourcesPerController; j++ { + obj.Name = generatedName("Resource", j) + obj.Namespace = generatedName("namespace", j) + + for k := 0; k < conditionsPerController; k++ { + condition.Type = generatedName("condition", k) + + for v := 0; v < variantsPerCondition; v++ { + condition.Reason = generatedName("variant", v) + rec.RecordConditionFor(kind, obj, condition.Type, condition.Reason, condition.Reason, transitionTime) + } + } + } + } + + return rec +} + +// Benchmark the average time per call on a pre-populated scenario: +// - RecordConditionFor +// - RemoveConditionsFor +// +// Reports: ns/op for each sub-benchmark. +func Benchmark_ConditionMetricsRecorder_TimePerCall(b *testing.B) { + reg := prometheus.NewRegistry() + rec := createBenchmarkScenario(b, reg) + + // Use a stable object that exists in the populated dataset. + kind := "Benchmark" + obj := &FakeObject{ + Name: "Resource0", + Namespace: "namespace0", + } + transitionTime := time.Now().UTC() + + // Two variants in the same (controller,kind,name,namespace,condition) group. + condTrue := &FakeCondition{ + Type: "condition0", + Status: "True", + Reason: "variant0", + } + condFalse := &FakeCondition{ + Type: "condition0", + Status: "False", + Reason: "variant0", + } + + b.Run("RecordConditionFor", func(b *testing.B) { + b.ReportAllocs() + b.ResetTimer() + b.ReportMetric(float64(maxCardinality), "series/op") + for i := 0; i < b.N; i++ { + // Flip between two variants + if (i & 1) == 0 { + rec.RecordConditionFor(kind, obj, condTrue.Type, condTrue.Status, condTrue.Reason, transitionTime) + } else { + rec.RecordConditionFor(kind, obj, condFalse.Type, condFalse.Status, condFalse.Reason, transitionTime) + } + } + }) + + b.Run("RemoveConditionsFor", func(b *testing.B) { + b.ReportAllocs() + b.ResetTimer() + b.ReportMetric(float64(maxCardinality), "series/op") + for i := 0; i < b.N; i++ { + // Ensure there is something to remove, but do not count the set time. + b.StopTimer() + rec.RecordConditionFor(kind, obj, condTrue.Type, condTrue.Status, condTrue.Reason, transitionTime) + b.StartTimer() + + rec.RemoveConditionsFor(kind, obj) + } + }) +} + +// Benchmark the size of the Prometheus gather output on a pre-populated scenario. +// +// Reports: Metric size in KB retrieved from the registry. +func Benchmark_ConditionMetricsRecorder_PrometheusMemorySize(b *testing.B) { + reg := prometheus.NewRegistry() + _ = createBenchmarkScenario(b, reg) + + b.ReportAllocs() + b.ResetTimer() + b.ReportMetric(float64(maxCardinality), "series/op") + + mfs, err := reg.Gather() + if err != nil { + b.Fatalf("gather: %v", err) + } + var buf bytes.Buffer + for _, mf := range mfs { + _, _ = expfmt.MetricFamilyToText(&buf, mf) + } + sizeKB := float64(buf.Len()) / 1024.0 + + b.ReportMetric(sizeKB, "KB") +} diff --git a/pkg/crd-condition-metrics/metrics_test.go b/pkg/crd-condition-metrics/metrics_test.go new file mode 100644 index 0000000..0abd9b4 --- /dev/null +++ b/pkg/crd-condition-metrics/metrics_test.go @@ -0,0 +1,131 @@ +package crd_condition_metrics + +import ( + "strings" + "testing" + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func makeObj(name, namespace string) *FakeObject { + return &FakeObject{ + Name: name, + Namespace: namespace, + } +} + +func TestConditionMetricRecorder_Record_Transition_And_SecondCondition(t *testing.T) { + gauge := NewOperatorConditionsGauge("test_record_transition_and_second_condition") + reg := prometheus.NewRegistry() + _ = reg.Register(gauge) + + // Arrange + rec := &ConditionMetricRecorder{ + Controller: "my-controller", + OperatorConditionsGauge: gauge, + } + kind := "MyCRD" + name := "cr-1" + ns := "prod" + transitionTime := time.Date(2025, time.January, 1, 0, 0, 0, 0, time.UTC) + + obj := makeObj(name, ns) + + // Record Ready=True + rec.RecordConditionFor(kind, obj, "Ready", "True", "", transitionTime) + + // Flip Ready -> False with reason + rec.RecordConditionFor(kind, obj, "Ready", "False", "Failed", transitionTime) + + // Another condition Synchronized=True (independent group) + rec.RecordConditionFor(kind, obj, "Synchronized", "True", "", transitionTime) + + // Expect: Ready False(reason)=1, Synchronized True=1 + want := ` +# HELP test_record_transition_and_second_condition_controller_condition Condition status for a custom resource; one time series per (custom resource, condition type) combination. +# TYPE test_record_transition_and_second_condition_controller_condition gauge +test_record_transition_and_second_condition_controller_condition{condition="Ready",controller="my-controller",id="prod/cr-1",kind="MyCRD",name="cr-1",namespace="prod",reason="Failed",status="False"} 1735689600 +test_record_transition_and_second_condition_controller_condition{condition="Synchronized",controller="my-controller",id="prod/cr-1",kind="MyCRD",name="cr-1",namespace="prod",reason="",status="True"} 1735689600 +` + require.NoError(t, + testutil.GatherAndCompare( + reg, + strings.NewReader(want), + "test_record_transition_and_second_condition_controller_condition", + ), + ) + + removed := rec.RemoveConditionsFor(kind, obj) + assert.Equal(t, 2, removed) +} + +func TestConditionMetricRecorder_RemoveConditionsFor(t *testing.T) { + gauge := NewOperatorConditionsGauge("test_remove_conditions_for_condition") + reg := prometheus.NewRegistry() + _ = reg.Register(gauge) + // Arrange + rec := &ConditionMetricRecorder{ + Controller: "my-controller", + OperatorConditionsGauge: gauge, + } + kind := "MyCRD" + name := "cr-2" + ns := "staging" + transitionTime := time.Date(2025, time.January, 1, 0, 0, 0, 0, time.UTC) + obj := makeObj(name, ns) + + rec.RecordConditionFor(kind, obj, "Ready", "True", "", transitionTime) + rec.RecordConditionFor(kind, obj, "Synchronized", "False", "SyncPending", transitionTime) + + // Remove all condition series for this object + removed := rec.RemoveConditionsFor(kind, obj) + assert.Equal(t, 2, removed) + + // No series remain for this object + require.NoError(t, + testutil.GatherAndCompare( + reg, + strings.NewReader(""), + "test_remove_conditions_for_condition_controller_condition", + ), + ) +} + +func TestConditionMetricRecorder_SetsKindLabelFromObject(t *testing.T) { + gauge := NewOperatorConditionsGauge("test_sets_kind_label_from_object") + reg := prometheus.NewRegistry() + _ = reg.Register(gauge) + ctrl := "my-controller" + rec := &ConditionMetricRecorder{ + Controller: ctrl, + OperatorConditionsGauge: gauge, + } + kind := "FancyKind" + name := "obj-1" + ns := "ns-1" + transitionTime := time.Date(2025, time.January, 1, 0, 0, 0, 0, time.UTC) + obj := makeObj(name, ns) + + // Record a condition + rec.RecordConditionFor(kind, obj, "Ready", "True", "", transitionTime) + + // Expect the 'kind' label to reflect the object's Kind + want := ` +# HELP test_sets_kind_label_from_object_controller_condition Condition status for a custom resource; one time series per (custom resource, condition type) combination. +# TYPE test_sets_kind_label_from_object_controller_condition gauge +test_sets_kind_label_from_object_controller_condition{condition="Ready",controller="my-controller",id="ns-1/obj-1",kind="FancyKind",name="obj-1",namespace="ns-1",reason="",status="True"} 1735689600 +` + require.NoError(t, + testutil.GatherAndCompare( + reg, + strings.NewReader(want), + "test_sets_kind_label_from_object_controller_condition", + ), + ) + + assert.Equal(t, 1, gauge.DeleteByIndex(ctrl, kind, name, ns)) +} diff --git a/renovate.json b/renovate.json new file mode 100644 index 0000000..7c846a7 --- /dev/null +++ b/renovate.json @@ -0,0 +1,7 @@ +{ + "$schema": "https://docs.renovatebot.com/renovate-schema.json", + "extends": [ + "config:recommended" + ], + "ignorePaths": [] +} \ No newline at end of file