diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index 0ba954623..944a8f2c9 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -176,7 +176,7 @@ func (m *CertificateApprover) Reconcile(ctx context.Context, req ctrl.Request) ( func reconcileLimits(csrName string, machines []machinehandlerpkg.Machine, nodes *corev1.NodeList, csrs *certificatesv1.CertificateSigningRequestList) bool { maxPending := getMaxPending(machines, nodes) atomic.StoreUint32(&MaxPendingCSRs, uint32(maxPending)) - pending := recentlyPendingCSRs(csrs.Items) + pending := recentlyPendingNodeCSRs(csrs.Items) atomic.StoreUint32(&PendingCSRs, uint32(pending)) if pending > maxPending { klog.Errorf("%v: Pending CSRs: %d; Max pending allowed: %d. Difference between pending CSRs and machines > %v. Ignoring all CSRs as too many recent pending CSRs seen", csrName, pending, maxPending, maxDiffBetweenPendingCSRsAndMachinesCount) diff --git a/pkg/controller/csr_check.go b/pkg/controller/csr_check.go index f13e50ef3..3ac273561 100644 --- a/pkg/controller/csr_check.go +++ b/pkg/controller/csr_check.go @@ -496,7 +496,7 @@ func isApprovedByCMA(csr certificatesv1.CertificateSigningRequest) bool { return false } -func recentlyPendingCSRs(csrs []certificatesv1.CertificateSigningRequest) int { +func recentlyPendingNodeCSRs(csrs []certificatesv1.CertificateSigningRequest) int { // assumes we are scheduled on the master meaning our clock is the same currentTime := now() start := currentTime.Add(-maxPendingDelta) @@ -510,7 +510,7 @@ func recentlyPendingCSRs(csrs []certificatesv1.CertificateSigningRequest) int { continue } - if !isApproved(csr) { + if (isReqFromNodeBootstrapper(&csr) || isRequestFromNodeUser(csr)) && !isApproved(csr) { pending++ } } @@ -518,6 +518,10 @@ func recentlyPendingCSRs(csrs []certificatesv1.CertificateSigningRequest) int { return pending } +func isRequestFromNodeUser(csr certificatesv1.CertificateSigningRequest) bool { + return strings.HasPrefix(csr.Spec.Username, nodeUserPrefix) +} + // getServingCert fetches the node by the given name and attempts to connect to // its kubelet on the first advertised address. // diff --git a/pkg/controller/csr_check_test.go b/pkg/controller/csr_check_test.go index 01c19830f..ba93baa04 100644 --- a/pkg/controller/csr_check_test.go +++ b/pkg/controller/csr_check_test.go @@ -1924,14 +1924,29 @@ func TestGetServingCert(t *testing.T) { } } -func TestRecentlyPendingCSRs(t *testing.T) { - approvedCSR := certificatesv1.CertificateSigningRequest{ +func TestRecentlyPendingNodeBootstrapperCSRs(t *testing.T) { + approvedNodeBootstrapperCSR := certificatesv1.CertificateSigningRequest{ + Spec: certificatesv1.CertificateSigningRequestSpec{ + Username: nodeBootstrapperUsername, + Groups: nodeBootstrapperGroups.List(), + }, Status: certificatesv1.CertificateSigningRequestStatus{ Conditions: []certificatesv1.CertificateSigningRequestCondition{{ Type: certificatesv1.CertificateApproved, }}, }, } + pendingNodeBootstrapperCSR := certificatesv1.CertificateSigningRequest{ + Spec: certificatesv1.CertificateSigningRequestSpec{ + Username: nodeBootstrapperUsername, + Groups: nodeBootstrapperGroups.List(), + }, + } + pendingNodeServerCSR := certificatesv1.CertificateSigningRequest{ + Spec: certificatesv1.CertificateSigningRequestSpec{ + Username: nodeUserPrefix + "clustername-abcde-master-us-west-1a-0", + }, + } pendingCSR := certificatesv1.CertificateSigningRequest{} pendingTime := baseTime.Add(time.Second) pastApprovalTime := baseTime.Add(-maxPendingDelta) @@ -1948,44 +1963,56 @@ func TestRecentlyPendingCSRs(t *testing.T) { expectPending int }{ { - name: "recently pending csr", - csrs: []certificatesv1.CertificateSigningRequest{createdAt(pendingTime, pendingCSR)}, + name: "recently pending Node bootstrapper csr", + csrs: []certificatesv1.CertificateSigningRequest{createdAt(pendingTime, pendingNodeBootstrapperCSR)}, + expectPending: 1, + }, + { + name: "recently pending Node csr", + csrs: []certificatesv1.CertificateSigningRequest{createdAt(pendingTime, pendingNodeServerCSR)}, expectPending: 1, }, + { + name: "recently pending unknown csr", + csrs: []certificatesv1.CertificateSigningRequest{createdAt(pendingTime, pendingCSR)}, + expectPending: 0, + }, { name: "recently approved csr", - csrs: []certificatesv1.CertificateSigningRequest{createdAt(pendingTime, approvedCSR)}, + csrs: []certificatesv1.CertificateSigningRequest{createdAt(pendingTime, approvedNodeBootstrapperCSR)}, expectPending: 0, }, { name: "pending past approval time", - csrs: []certificatesv1.CertificateSigningRequest{createdAt(pastApprovalTime, pendingCSR)}, + csrs: []certificatesv1.CertificateSigningRequest{createdAt(pastApprovalTime, pendingNodeBootstrapperCSR)}, expectPending: 0, }, { name: "pending before approval time", - csrs: []certificatesv1.CertificateSigningRequest{createdAt(preApprovalTime, pendingCSR)}, + csrs: []certificatesv1.CertificateSigningRequest{createdAt(preApprovalTime, pendingNodeBootstrapperCSR)}, expectPending: 0, }, { name: "multiple different csrs", csrs: []certificatesv1.CertificateSigningRequest{ - createdAt(pendingTime, pendingCSR), - createdAt(pendingTime, pendingCSR), + createdAt(pendingTime, pendingNodeBootstrapperCSR), + createdAt(pendingTime, pendingNodeBootstrapperCSR), + createdAt(pendingTime, pendingNodeServerCSR), - createdAt(pendingTime, approvedCSR), - createdAt(preApprovalTime, approvedCSR), - createdAt(pastApprovalTime, approvedCSR), - createdAt(preApprovalTime, pendingCSR), - createdAt(pastApprovalTime, pendingCSR), + createdAt(pendingTime, pendingCSR), + createdAt(pendingTime, approvedNodeBootstrapperCSR), + createdAt(preApprovalTime, approvedNodeBootstrapperCSR), + createdAt(pastApprovalTime, approvedNodeBootstrapperCSR), + createdAt(preApprovalTime, pendingNodeBootstrapperCSR), + createdAt(pastApprovalTime, pendingNodeBootstrapperCSR), }, - expectPending: 2, + expectPending: 3, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if pending := recentlyPendingCSRs(tt.csrs); pending != tt.expectPending { + if pending := recentlyPendingNodeCSRs(tt.csrs); pending != tt.expectPending { t.Errorf("Expected %v pending CSRs, got: %v", tt.expectPending, pending) } }) diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index 113858473..4ff5b3638 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -13,10 +13,10 @@ import ( const DefaultMetricsPort = ":9191" var ( - // CurrentPendingCSRCountDesc is a metric to report count of the pending csr in the cluster - CurrentPendingCSRCountDesc = prometheus.NewDesc("mapi_current_pending_csr", "Count of pending CSRs at the cluster level", nil, nil) - // MaxPendingCSRDesc is a metric to report threshold value of the pending csr beyond which csr will be ignored - MaxPendingCSRDesc = prometheus.NewDesc("mapi_max_pending_csr", "Threshold value of the pending CSRs beyond which any new CSR requests will be ignored ", nil, nil) + // CurrentPendingCSRCountDesc is a metric to report count of pending node CSRs in the cluster + CurrentPendingCSRCountDesc = prometheus.NewDesc("mapi_current_pending_csr", "Count of recently pending node CSRs at the cluster level", nil, nil) + // MaxPendingCSRDesc is a metric to report threshold value of the pending node CSRs beyond which all CSR will be ignored by machine approver + MaxPendingCSRDesc = prometheus.NewDesc("mapi_max_pending_csr", "Threshold value of the pending node CSRs beyond which all CSR will be ignored by machine approver", nil, nil) ) func init() {