From 6495f80f89e0618b06315e00f9cb6aa4520be406 Mon Sep 17 00:00:00 2001
From: RainbowMango <qdurenhongcai@gmail.com>
Date: Tue, 16 Aug 2022 21:13:48 +0800
Subject: [PATCH] enable node pod list only when needed for better performance

Signed-off-by: RainbowMango <qdurenhongcai@gmail.com>
---
 cmd/agent/app/agent.go                        |  1 +
 cmd/agent/app/options/options.go              |  4 ++
 cmd/controller-manager/app/options/options.go |  5 +-
 pkg/controllers/context/context.go            |  5 +-
 .../status/cluster_status_controller.go       | 48 ++++++++++---------
 5 files changed, 39 insertions(+), 24 deletions(-)

diff --git a/cmd/agent/app/agent.go b/cmd/agent/app/agent.go
index b875c6bd8504..40b54ed38f4f 100644
--- a/cmd/agent/app/agent.go
+++ b/cmd/agent/app/agent.go
@@ -225,6 +225,7 @@ func setupControllers(mgr controllerruntime.Manager, opts *options.Options, stop
 			ClusterAPIBurst:                   opts.ClusterAPIBurst,
 			ConcurrentWorkSyncs:               opts.ConcurrentWorkSyncs,
 			RateLimiterOptions:                opts.RateLimiterOpts,
+			EnableClusterResourceModeling:     opts.EnableClusterResourceModeling,
 		},
 		StopChan:            stopChan,
 		ResourceInterpreter: resourceInterpreter,
diff --git a/cmd/agent/app/options/options.go b/cmd/agent/app/options/options.go
index bcee5cefd7da..b9ffb891ae57 100644
--- a/cmd/agent/app/options/options.go
+++ b/cmd/agent/app/options/options.go
@@ -108,6 +108,10 @@ type Options struct {
 	// ClusterRegion represents the region of the cluster locate in.
 	ClusterRegion string
 
+	// EnableClusterResourceModeling indicates if enable cluster resource modeling.
+	// The resource modeling might be used by the scheduler to make scheduling decisions
+	// in scenario of dynamic replica assignment based on cluster free resources.
+	// Disable if it does not fit your cases for better performance.
 	EnableClusterResourceModeling bool
 }
 
diff --git a/cmd/controller-manager/app/options/options.go b/cmd/controller-manager/app/options/options.go
index acd9aab518db..a29b218a4ba0 100644
--- a/cmd/controller-manager/app/options/options.go
+++ b/cmd/controller-manager/app/options/options.go
@@ -120,7 +120,10 @@ type Options struct {
 
 	RateLimiterOpts ratelimiterflag.Options
 	ProfileOpts     profileflag.Options
-
+	// EnableClusterResourceModeling indicates if enable cluster resource modeling.
+	// The resource modeling might be used by the scheduler to make scheduling decisions
+	// in scenario of dynamic replica assignment based on cluster free resources.
+	// Disable if it does not fit your cases for better performance.
 	EnableClusterResourceModeling bool
 }
 
diff --git a/pkg/controllers/context/context.go b/pkg/controllers/context/context.go
index 90d2e83fff67..52448e0c1dbb 100644
--- a/pkg/controllers/context/context.go
+++ b/pkg/controllers/context/context.go
@@ -64,7 +64,10 @@ type Options struct {
 	// GracefulEvictionTimeout is the timeout period waiting for the grace-eviction-controller performs the final
 	// removal since the workload(resource) has been moved to the graceful eviction tasks.
 	GracefulEvictionTimeout metav1.Duration
-
+	// EnableClusterResourceModeling indicates if enable cluster resource modeling.
+	// The resource modeling might be used by the scheduler to make scheduling decisions
+	// in scenario of dynamic replica assignment based on cluster free resources.
+	// Disable if it does not fit your cases for better performance.
 	EnableClusterResourceModeling bool
 }
 
diff --git a/pkg/controllers/status/cluster_status_controller.go b/pkg/controllers/status/cluster_status_controller.go
index b6395f450bd8..7059b487c596 100644
--- a/pkg/controllers/status/cluster_status_controller.go
+++ b/pkg/controllers/status/cluster_status_controller.go
@@ -92,6 +92,10 @@ type ClusterStatusController struct {
 	ClusterCacheSyncTimeout metav1.Duration
 	RateLimiterOptions      ratelimiterflag.Options
 
+	// EnableClusterResourceModeling indicates if enable cluster resource modeling.
+	// The resource modeling might be used by the scheduler to make scheduling decisions
+	// in scenario of dynamic replica assignment based on cluster free resources.
+	// Disable if it does not fit your cases for better performance.
 	EnableClusterResourceModeling bool
 }
 
@@ -172,16 +176,7 @@ func (c *ClusterStatusController) syncClusterStatus(cluster *clusterv1alpha1.Clu
 	}
 
 	// skip collecting cluster status if not ready
-	if online && healthy && readyCondition.Status == metav1.ConditionTrue && c.EnableClusterResourceModeling {
-		// get or create informer for pods and nodes in member cluster
-		clusterInformerManager, err := c.buildInformerForCluster(clusterClient)
-		if err != nil {
-			klog.Errorf("Failed to get or create informer for Cluster %s. Error: %v.", cluster.GetName(), err)
-			// in large-scale clusters, the timeout may occur.
-			// if clusterInformerManager fails to be built, should be returned, otherwise, it may cause a nil pointer
-			return controllerruntime.Result{Requeue: true}, err
-		}
-
+	if online && healthy && readyCondition.Status == metav1.ConditionTrue {
 		if cluster.Spec.SyncMode == clusterv1alpha1.Pull {
 			// init the lease controller for pull mode clusters
 			c.initLeaseController(cluster)
@@ -191,6 +186,7 @@ func (c *ClusterStatusController) syncClusterStatus(cluster *clusterv1alpha1.Clu
 		if err != nil {
 			klog.Errorf("Failed to get Kubernetes version for Cluster %s. Error: %v.", cluster.GetName(), err)
 		}
+		currentClusterStatus.KubernetesVersion = clusterVersion
 
 		// get the list of APIs installed in the member cluster
 		apiEnables, err := getAPIEnablements(clusterClient)
@@ -199,21 +195,29 @@ func (c *ClusterStatusController) syncClusterStatus(cluster *clusterv1alpha1.Clu
 		} else if err != nil {
 			klog.Warningf("Maybe get partial(%d) APIs installed in Cluster %s. Error: %v.", len(apiEnables), cluster.GetName(), err)
 		}
+		currentClusterStatus.APIEnablements = apiEnables
 
-		nodes, err := listNodes(clusterInformerManager)
-		if err != nil {
-			klog.Errorf("Failed to list nodes for Cluster %s. Error: %v.", cluster.GetName(), err)
-		}
+		if c.EnableClusterResourceModeling {
+			// get or create informer for pods and nodes in member cluster
+			clusterInformerManager, err := c.buildInformerForCluster(clusterClient)
+			if err != nil {
+				klog.Errorf("Failed to get or create informer for Cluster %s. Error: %v.", cluster.GetName(), err)
+				// in large-scale clusters, the timeout may occur.
+				// if clusterInformerManager fails to be built, should be returned, otherwise, it may cause a nil pointer
+				return controllerruntime.Result{Requeue: true}, err
+			}
+			nodes, err := listNodes(clusterInformerManager)
+			if err != nil {
+				klog.Errorf("Failed to list nodes for Cluster %s. Error: %v.", cluster.GetName(), err)
+			}
 
-		pods, err := listPods(clusterInformerManager)
-		if err != nil {
-			klog.Errorf("Failed to list pods for Cluster %s. Error: %v.", cluster.GetName(), err)
+			pods, err := listPods(clusterInformerManager)
+			if err != nil {
+				klog.Errorf("Failed to list pods for Cluster %s. Error: %v.", cluster.GetName(), err)
+			}
+			currentClusterStatus.NodeSummary = getNodeSummary(nodes)
+			currentClusterStatus.ResourceSummary = getResourceSummary(nodes, pods)
 		}
-
-		currentClusterStatus.KubernetesVersion = clusterVersion
-		currentClusterStatus.APIEnablements = apiEnables
-		currentClusterStatus.NodeSummary = getNodeSummary(nodes)
-		currentClusterStatus.ResourceSummary = getResourceSummary(nodes, pods)
 	}
 
 	setTransitionTime(currentClusterStatus.Conditions, readyCondition)