From be7dcbb902f8941a3c4d4fbc09a428472f98c8b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radek=20Ma=C5=88=C3=A1k?= Date: Tue, 7 Nov 2023 10:40:31 +0100 Subject: [PATCH] Filter non node bootstrapper CSR in metrics CSRs not created by node bootstrapper are correctly filtered out in the reconciler, but this filtering was not performed when reporting metrics. This PR adds the necessary filtering and clarifies what the metrics report. --- pkg/controller/controller.go | 2 +- pkg/controller/csr_check.go | 8 +++-- pkg/controller/csr_check_test.go | 59 +++++++++++++++++++++++--------- pkg/metrics/metrics.go | 8 ++--- 4 files changed, 54 insertions(+), 23 deletions(-) diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index 0ba954623..944a8f2c9 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -176,7 +176,7 @@ func (m *CertificateApprover) Reconcile(ctx context.Context, req ctrl.Request) ( func reconcileLimits(csrName string, machines []machinehandlerpkg.Machine, nodes *corev1.NodeList, csrs *certificatesv1.CertificateSigningRequestList) bool { maxPending := getMaxPending(machines, nodes) atomic.StoreUint32(&MaxPendingCSRs, uint32(maxPending)) - pending := recentlyPendingCSRs(csrs.Items) + pending := recentlyPendingNodeCSRs(csrs.Items) atomic.StoreUint32(&PendingCSRs, uint32(pending)) if pending > maxPending { klog.Errorf("%v: Pending CSRs: %d; Max pending allowed: %d. Difference between pending CSRs and machines > %v. Ignoring all CSRs as too many recent pending CSRs seen", csrName, pending, maxPending, maxDiffBetweenPendingCSRsAndMachinesCount) diff --git a/pkg/controller/csr_check.go b/pkg/controller/csr_check.go index f13e50ef3..3ac273561 100644 --- a/pkg/controller/csr_check.go +++ b/pkg/controller/csr_check.go @@ -496,7 +496,7 @@ func isApprovedByCMA(csr certificatesv1.CertificateSigningRequest) bool { return false } -func recentlyPendingCSRs(csrs []certificatesv1.CertificateSigningRequest) int { +func recentlyPendingNodeCSRs(csrs []certificatesv1.CertificateSigningRequest) int { // assumes we are scheduled on the master meaning our clock is the same currentTime := now() start := currentTime.Add(-maxPendingDelta) @@ -510,7 +510,7 @@ func recentlyPendingCSRs(csrs []certificatesv1.CertificateSigningRequest) int { continue } - if !isApproved(csr) { + if (isReqFromNodeBootstrapper(&csr) || isRequestFromNodeUser(csr)) && !isApproved(csr) { pending++ } } @@ -518,6 +518,10 @@ func recentlyPendingCSRs(csrs []certificatesv1.CertificateSigningRequest) int { return pending } +func isRequestFromNodeUser(csr certificatesv1.CertificateSigningRequest) bool { + return strings.HasPrefix(csr.Spec.Username, nodeUserPrefix) +} + // getServingCert fetches the node by the given name and attempts to connect to // its kubelet on the first advertised address. // diff --git a/pkg/controller/csr_check_test.go b/pkg/controller/csr_check_test.go index 01c19830f..ba93baa04 100644 --- a/pkg/controller/csr_check_test.go +++ b/pkg/controller/csr_check_test.go @@ -1924,14 +1924,29 @@ func TestGetServingCert(t *testing.T) { } } -func TestRecentlyPendingCSRs(t *testing.T) { - approvedCSR := certificatesv1.CertificateSigningRequest{ +func TestRecentlyPendingNodeBootstrapperCSRs(t *testing.T) { + approvedNodeBootstrapperCSR := certificatesv1.CertificateSigningRequest{ + Spec: certificatesv1.CertificateSigningRequestSpec{ + Username: nodeBootstrapperUsername, + Groups: nodeBootstrapperGroups.List(), + }, Status: certificatesv1.CertificateSigningRequestStatus{ Conditions: []certificatesv1.CertificateSigningRequestCondition{{ Type: certificatesv1.CertificateApproved, }}, }, } + pendingNodeBootstrapperCSR := certificatesv1.CertificateSigningRequest{ + Spec: certificatesv1.CertificateSigningRequestSpec{ + Username: nodeBootstrapperUsername, + Groups: nodeBootstrapperGroups.List(), + }, + } + pendingNodeServerCSR := certificatesv1.CertificateSigningRequest{ + Spec: certificatesv1.CertificateSigningRequestSpec{ + Username: nodeUserPrefix + "clustername-abcde-master-us-west-1a-0", + }, + } pendingCSR := certificatesv1.CertificateSigningRequest{} pendingTime := baseTime.Add(time.Second) pastApprovalTime := baseTime.Add(-maxPendingDelta) @@ -1948,44 +1963,56 @@ func TestRecentlyPendingCSRs(t *testing.T) { expectPending int }{ { - name: "recently pending csr", - csrs: []certificatesv1.CertificateSigningRequest{createdAt(pendingTime, pendingCSR)}, + name: "recently pending Node bootstrapper csr", + csrs: []certificatesv1.CertificateSigningRequest{createdAt(pendingTime, pendingNodeBootstrapperCSR)}, + expectPending: 1, + }, + { + name: "recently pending Node csr", + csrs: []certificatesv1.CertificateSigningRequest{createdAt(pendingTime, pendingNodeServerCSR)}, expectPending: 1, }, + { + name: "recently pending unknown csr", + csrs: []certificatesv1.CertificateSigningRequest{createdAt(pendingTime, pendingCSR)}, + expectPending: 0, + }, { name: "recently approved csr", - csrs: []certificatesv1.CertificateSigningRequest{createdAt(pendingTime, approvedCSR)}, + csrs: []certificatesv1.CertificateSigningRequest{createdAt(pendingTime, approvedNodeBootstrapperCSR)}, expectPending: 0, }, { name: "pending past approval time", - csrs: []certificatesv1.CertificateSigningRequest{createdAt(pastApprovalTime, pendingCSR)}, + csrs: []certificatesv1.CertificateSigningRequest{createdAt(pastApprovalTime, pendingNodeBootstrapperCSR)}, expectPending: 0, }, { name: "pending before approval time", - csrs: []certificatesv1.CertificateSigningRequest{createdAt(preApprovalTime, pendingCSR)}, + csrs: []certificatesv1.CertificateSigningRequest{createdAt(preApprovalTime, pendingNodeBootstrapperCSR)}, expectPending: 0, }, { name: "multiple different csrs", csrs: []certificatesv1.CertificateSigningRequest{ - createdAt(pendingTime, pendingCSR), - createdAt(pendingTime, pendingCSR), + createdAt(pendingTime, pendingNodeBootstrapperCSR), + createdAt(pendingTime, pendingNodeBootstrapperCSR), + createdAt(pendingTime, pendingNodeServerCSR), - createdAt(pendingTime, approvedCSR), - createdAt(preApprovalTime, approvedCSR), - createdAt(pastApprovalTime, approvedCSR), - createdAt(preApprovalTime, pendingCSR), - createdAt(pastApprovalTime, pendingCSR), + createdAt(pendingTime, pendingCSR), + createdAt(pendingTime, approvedNodeBootstrapperCSR), + createdAt(preApprovalTime, approvedNodeBootstrapperCSR), + createdAt(pastApprovalTime, approvedNodeBootstrapperCSR), + createdAt(preApprovalTime, pendingNodeBootstrapperCSR), + createdAt(pastApprovalTime, pendingNodeBootstrapperCSR), }, - expectPending: 2, + expectPending: 3, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if pending := recentlyPendingCSRs(tt.csrs); pending != tt.expectPending { + if pending := recentlyPendingNodeCSRs(tt.csrs); pending != tt.expectPending { t.Errorf("Expected %v pending CSRs, got: %v", tt.expectPending, pending) } }) diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index 113858473..4ff5b3638 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -13,10 +13,10 @@ import ( const DefaultMetricsPort = ":9191" var ( - // CurrentPendingCSRCountDesc is a metric to report count of the pending csr in the cluster - CurrentPendingCSRCountDesc = prometheus.NewDesc("mapi_current_pending_csr", "Count of pending CSRs at the cluster level", nil, nil) - // MaxPendingCSRDesc is a metric to report threshold value of the pending csr beyond which csr will be ignored - MaxPendingCSRDesc = prometheus.NewDesc("mapi_max_pending_csr", "Threshold value of the pending CSRs beyond which any new CSR requests will be ignored ", nil, nil) + // CurrentPendingCSRCountDesc is a metric to report count of pending node CSRs in the cluster + CurrentPendingCSRCountDesc = prometheus.NewDesc("mapi_current_pending_csr", "Count of recently pending node CSRs at the cluster level", nil, nil) + // MaxPendingCSRDesc is a metric to report threshold value of the pending node CSRs beyond which all CSR will be ignored by machine approver + MaxPendingCSRDesc = prometheus.NewDesc("mapi_max_pending_csr", "Threshold value of the pending node CSRs beyond which all CSR will be ignored by machine approver", nil, nil) ) func init() {