diff --git a/docs/user-guide/metrics.md b/docs/user-guide/metrics.md new file mode 100644 index 000000000..87bb2cb74 --- /dev/null +++ b/docs/user-guide/metrics.md @@ -0,0 +1,27 @@ +# Metrics + +The metrics endpoint is exposed on `:8080` by default (customizable with the `-metrics-addr` flag). + +## Metric List + +### `hnc_hierarchicalresourcequota` + +This metric exposes resource limits and usage for HierarchicalResourceQuotas. + +#### Labels + +- `hrq`: Name of the HierarchicalResourceQuota +- `namespace`: Namespace of the HierarchicalResourceQuota +- `resource`: Resource type (e.g., `cpu`, `memory`, `pods`) +- `type`: Either `hard` (limit) or `used` (current usage) + +#### Example + +``` +# HELP hnc_hierarchicalresourcequota HRQ hard/used like kube_resourcequota +# TYPE hnc_hierarchicalresourcequota gauge +hnc_hierarchicalresourcequota{hrq="team-quota",namespace="team-a",resource="cpu",type="hard"} 100 +hnc_hierarchicalresourcequota{hrq="team-quota",namespace="team-a",resource="cpu",type="used"} 45 +hnc_hierarchicalresourcequota{hrq="team-quota",namespace="team-a",resource="memory",type="hard"} 536870912 +hnc_hierarchicalresourcequota{hrq="team-quota",namespace="team-a",resource="memory",type="used"} 268435456 +``` diff --git a/internal/hrq/metrics.go b/internal/hrq/metrics.go new file mode 100644 index 000000000..1180362da --- /dev/null +++ b/internal/hrq/metrics.go @@ -0,0 +1,76 @@ +package hrq + +import ( + "context" + "time" + + "github.com/go-logr/logr" + "github.com/prometheus/client_golang/prometheus" + corev1 "k8s.io/api/core/v1" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/metrics" + + api "sigs.k8s.io/hierarchical-namespaces/api/v1alpha2" +) + +func RegisterMetrics(mgr ctrl.Manager) error { + if err := metrics.Registry.Register(&hrqCollector{ + client: mgr.GetClient(), + logger: mgr.GetLogger().WithValues("collector", "hrqCollector"), + timeout: time.Second * 10, + }); err != nil { + return err + } + return nil +} + +type hrqCollector struct { + timeout time.Duration + client client.Client + logger logr.Logger +} + +func (c *hrqCollector) desc() *prometheus.Desc { + return prometheus.NewDesc( + "hnc_hierarchicalresourcequota", + "HRQ hard/used like kube_resourcequota", + []string{"namespace", "hrq", "resource", "type"}, + nil, + ) +} + +func (c *hrqCollector) Describe(ch chan<- *prometheus.Desc) { + ch <- c.desc() +} + +func (c *hrqCollector) Collect(ch chan<- prometheus.Metric) { + ctx, cancel := context.WithTimeout(context.Background(), c.timeout) + defer cancel() + + var hrqs api.HierarchicalResourceQuotaList + if err := c.client.List(ctx, &hrqs); err != nil { + c.logger.Error(err, "Failed to list HRQs during metrics collection") + return + } + + for _, hrq := range hrqs.Items { + for typeLabel, resList := range map[string]corev1.ResourceList{ + "hard": hrq.Status.Hard, + "used": hrq.Status.Used, + } { + for res, qty := range resList { + v := qty.AsApproximateFloat64() + ch <- prometheus.MustNewConstMetric( + c.desc(), + prometheus.GaugeValue, + float64(v), + hrq.Namespace, + hrq.Name, + string(res), + typeLabel, + ) + } + } + } +} diff --git a/internal/setup/reconcilers.go b/internal/setup/reconcilers.go index 7b1419992..133f929d5 100644 --- a/internal/setup/reconcilers.go +++ b/internal/setup/reconcilers.go @@ -109,6 +109,10 @@ func CreateReconcilers(mgr ctrl.Manager, f *forest.Forest, opts Options) error { if opts.HRQSyncInterval != 0 { go watchHRQDrift(f, opts.HRQSyncInterval, hrqr) } + + if err := hrq.RegisterMetrics(mgr); err != nil { + return fmt.Errorf("cannot register HRQ metrics: %w", err) + } } if err := ar.SetupWithManager(mgr); err != nil {