Skip to content

Commit

Permalink
enable node pod list only when needed for better performance
Browse files Browse the repository at this point in the history
Signed-off-by: RainbowMango <qdurenhongcai@gmail.com>
  • Loading branch information
RainbowMango committed Aug 17, 2022
1 parent b3ca4c2 commit 6495f80
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 24 deletions.
1 change: 1 addition & 0 deletions cmd/agent/app/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ func setupControllers(mgr controllerruntime.Manager, opts *options.Options, stop
ClusterAPIBurst: opts.ClusterAPIBurst,
ConcurrentWorkSyncs: opts.ConcurrentWorkSyncs,
RateLimiterOptions: opts.RateLimiterOpts,
EnableClusterResourceModeling: opts.EnableClusterResourceModeling,
},
StopChan: stopChan,
ResourceInterpreter: resourceInterpreter,
Expand Down
4 changes: 4 additions & 0 deletions cmd/agent/app/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,10 @@ type Options struct {
// ClusterRegion represents the region of the cluster locate in.
ClusterRegion string

// EnableClusterResourceModeling indicates if enable cluster resource modeling.
// The resource modeling might be used by the scheduler to make scheduling decisions
// in scenario of dynamic replica assignment based on cluster free resources.
// Disable if it does not fit your cases for better performance.
EnableClusterResourceModeling bool
}

Expand Down
5 changes: 4 additions & 1 deletion cmd/controller-manager/app/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,10 @@ type Options struct {

RateLimiterOpts ratelimiterflag.Options
ProfileOpts profileflag.Options

// EnableClusterResourceModeling indicates if enable cluster resource modeling.
// The resource modeling might be used by the scheduler to make scheduling decisions
// in scenario of dynamic replica assignment based on cluster free resources.
// Disable if it does not fit your cases for better performance.
EnableClusterResourceModeling bool
}

Expand Down
5 changes: 4 additions & 1 deletion pkg/controllers/context/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,10 @@ type Options struct {
// GracefulEvictionTimeout is the timeout period waiting for the grace-eviction-controller performs the final
// removal since the workload(resource) has been moved to the graceful eviction tasks.
GracefulEvictionTimeout metav1.Duration

// EnableClusterResourceModeling indicates if enable cluster resource modeling.
// The resource modeling might be used by the scheduler to make scheduling decisions
// in scenario of dynamic replica assignment based on cluster free resources.
// Disable if it does not fit your cases for better performance.
EnableClusterResourceModeling bool
}

Expand Down
48 changes: 26 additions & 22 deletions pkg/controllers/status/cluster_status_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@ type ClusterStatusController struct {
ClusterCacheSyncTimeout metav1.Duration
RateLimiterOptions ratelimiterflag.Options

// EnableClusterResourceModeling indicates if enable cluster resource modeling.
// The resource modeling might be used by the scheduler to make scheduling decisions
// in scenario of dynamic replica assignment based on cluster free resources.
// Disable if it does not fit your cases for better performance.
EnableClusterResourceModeling bool
}

Expand Down Expand Up @@ -172,16 +176,7 @@ func (c *ClusterStatusController) syncClusterStatus(cluster *clusterv1alpha1.Clu
}

// skip collecting cluster status if not ready
if online && healthy && readyCondition.Status == metav1.ConditionTrue && c.EnableClusterResourceModeling {
// get or create informer for pods and nodes in member cluster
clusterInformerManager, err := c.buildInformerForCluster(clusterClient)
if err != nil {
klog.Errorf("Failed to get or create informer for Cluster %s. Error: %v.", cluster.GetName(), err)
// in large-scale clusters, the timeout may occur.
// if clusterInformerManager fails to be built, should be returned, otherwise, it may cause a nil pointer
return controllerruntime.Result{Requeue: true}, err
}

if online && healthy && readyCondition.Status == metav1.ConditionTrue {
if cluster.Spec.SyncMode == clusterv1alpha1.Pull {
// init the lease controller for pull mode clusters
c.initLeaseController(cluster)
Expand All @@ -191,6 +186,7 @@ func (c *ClusterStatusController) syncClusterStatus(cluster *clusterv1alpha1.Clu
if err != nil {
klog.Errorf("Failed to get Kubernetes version for Cluster %s. Error: %v.", cluster.GetName(), err)
}
currentClusterStatus.KubernetesVersion = clusterVersion

// get the list of APIs installed in the member cluster
apiEnables, err := getAPIEnablements(clusterClient)
Expand All @@ -199,21 +195,29 @@ func (c *ClusterStatusController) syncClusterStatus(cluster *clusterv1alpha1.Clu
} else if err != nil {
klog.Warningf("Maybe get partial(%d) APIs installed in Cluster %s. Error: %v.", len(apiEnables), cluster.GetName(), err)
}
currentClusterStatus.APIEnablements = apiEnables

nodes, err := listNodes(clusterInformerManager)
if err != nil {
klog.Errorf("Failed to list nodes for Cluster %s. Error: %v.", cluster.GetName(), err)
}
if c.EnableClusterResourceModeling {
// get or create informer for pods and nodes in member cluster
clusterInformerManager, err := c.buildInformerForCluster(clusterClient)
if err != nil {
klog.Errorf("Failed to get or create informer for Cluster %s. Error: %v.", cluster.GetName(), err)
// in large-scale clusters, the timeout may occur.
// if clusterInformerManager fails to be built, should be returned, otherwise, it may cause a nil pointer
return controllerruntime.Result{Requeue: true}, err
}
nodes, err := listNodes(clusterInformerManager)
if err != nil {
klog.Errorf("Failed to list nodes for Cluster %s. Error: %v.", cluster.GetName(), err)
}

pods, err := listPods(clusterInformerManager)
if err != nil {
klog.Errorf("Failed to list pods for Cluster %s. Error: %v.", cluster.GetName(), err)
pods, err := listPods(clusterInformerManager)
if err != nil {
klog.Errorf("Failed to list pods for Cluster %s. Error: %v.", cluster.GetName(), err)
}
currentClusterStatus.NodeSummary = getNodeSummary(nodes)
currentClusterStatus.ResourceSummary = getResourceSummary(nodes, pods)
}

currentClusterStatus.KubernetesVersion = clusterVersion
currentClusterStatus.APIEnablements = apiEnables
currentClusterStatus.NodeSummary = getNodeSummary(nodes)
currentClusterStatus.ResourceSummary = getResourceSummary(nodes, pods)
}

setTransitionTime(currentClusterStatus.Conditions, readyCondition)
Expand Down

0 comments on commit 6495f80

Please sign in to comment.