Consider handling the minResources when using volcano as a gang scheduler #535

tenzen-y · 2023-03-02T17:20:12Z

Since #520, the mpi-operator respects the .spec.runPolicy.schedulingPolicy when creating the PodGroup.
Currently, the mpi-operator just passes the .spec.runPolicy.schedulingPolicy.minResources to the .spec.minResources in PodGroup when using volcano as a gang scheduler.

However, we may want to calculate all required resources in Launcher and Workers considering priorityClasses and then pass it to .spec.minResources in PodGroup like the following:

mpi-operator/pkg/controller/podgroup.go

Lines 89 to 168 in 2bc2b65

    
           func (c *MPIJobController) calcPGMinResources( 
        
           	minMember int32, 
        
           	replicas map[kubeflow.MPIReplicaType]*common.ReplicaSpec, 
        
           ) *corev1.ResourceList { 
        
           	var ro replicasOrder 
        
           	for t, replica := range replicas { 
        
           		rp := replicaPriority{ 
        
           			priority:    0, 
        
           			ReplicaSpec: *replica, 
        
           		} 
        
           		pcName := replica.Template.Spec.PriorityClassName 
        
           		if len(pcName) != 0 { 
        
           			priorityClass, err := c.priorityClassLister.Get(pcName) 
        
           			if err != nil || priorityClass == nil { 
        
           				klog.Warningf("Ignore replica %q priority class %q: %v", t, pcName, err) 
        
           			} else { 
        
           				rp.priority = priorityClass.Value 
        
           			} 
        
           		} 
        
           		ro = append(ro, rp) 
        
           	} 
        
           	sort.Sort(sort.Reverse(ro)) 
        
           	minResources := corev1.ResourceList{} 
        
           	podCount := int32(0) 
        
           	for _, rp := range ro { 
        
           		if rp.Replicas == nil { 
        
           			continue 
        
           		} 
        
           		for replicaCount := int32(0); replicaCount < *rp.Replicas && podCount < minMember; replicaCount++ { 
        
           			podCount++ 
        
           			for _, c := range rp.Template.Spec.Containers { 
        
           				addResources(minResources, c.Resources) 
        
           			} 
        
           		} 
        
           	} 
        
           	return &minResources 
        
           } 
        
           func addResources(minResources corev1.ResourceList, resources corev1.ResourceRequirements) { 
        
           	if minResources == nil || cmp.Equal(resources, corev1.ResourceRequirements{}) { 
        
           		return 
        
           	} 
        
           	// If Requests is omitted for a container, 
        
           	// it defaults to Limits if that is explicitly specified. 
        
           	merged := corev1.ResourceList{} 
        
           	for name, requests := range resources.Requests { 
        
           		if limits, ok := resources.Limits[name]; ok { 
        
           			merged[name] = limits 
        
           		} else { 
        
           			merged[name] = requests 
        
           		} 
        
           	} 
        
           	for name, limits := range resources.Limits { 
        
           		if _, ok := merged[name]; !ok { 
        
           			merged[name] = limits 
        
           		} 
        
           	} 
        
           	for name, quantity := range merged { 
        
           		if q, ok := minResources[name]; !ok { 
        
           			minResources[name] = quantity.DeepCopy() 
        
           		} else { 
        
           			q.Add(quantity) 
        
           			minResources[name] = q 
        
           		} 
        
           	} 
        
           } 
        
           type replicasOrder []replicaPriority 
        
           func (p replicasOrder) Len() int { 
        
           	return len(p) 
        
           } 
        
           func (p replicasOrder) Less(i, j int) bool { 
        
           	return p[i].priority < p[j].priority 
        
           } 
        
           func (p replicasOrder) Swap(i, j int) { 
        
           	p[i], p[j] = p[j], p[i] 
        
           }

/help

The text was updated successfully, but these errors were encountered:

google-oss-prow · 2023-03-02T17:20:14Z

@tenzen-y:
This request has been marked as needing help from a contributor.

Please ensure the request meets the requirements listed here.

If this request no longer meets these requirements, the label can be removed
by commenting with the /remove-help command.

In response to this:

Since #520, the mpi-operator respects the .spec.runPolicy.schedulingPolicy when creating the PodGroup.
Currently, the mpi-operator just passes the .spec.runPolicy.schedulingPolicy.minResources to the .spec.minResources in PodGroup when using volcano as a gang scheduler.

However, we may want to calculate all required resources in Launcher and Workers considering priorityClasses and then pass it to .spec.minResources in PodGroup like the following:

mpi-operator/pkg/controller/podgroup.go

Lines 89 to 168 in 2bc2b65

func (c *MPIJobController) calcPGMinResources(

minMember int32,

replicas map[kubeflow.MPIReplicaType]*common.ReplicaSpec,

) *corev1.ResourceList {

var ro replicasOrder

for t, replica := range replicas {

rp := replicaPriority{

priority: 0,

ReplicaSpec: *replica,

}

pcName := replica.Template.Spec.PriorityClassName

if len(pcName) != 0 {

priorityClass, err := c.priorityClassLister.Get(pcName)

if err != nil || priorityClass == nil {

klog.Warningf("Ignore replica %q priority class %q: %v", t, pcName, err)

} else {

rp.priority = priorityClass.Value

}

}

ro = append(ro, rp)

}

sort.Sort(sort.Reverse(ro))

minResources := corev1.ResourceList{}

podCount := int32(0)

for _, rp := range ro {

if rp.Replicas == nil {

continue

}

for replicaCount := int32(0); replicaCount < *rp.Replicas && podCount < minMember; replicaCount++ {

podCount++

for _, c := range rp.Template.Spec.Containers {

addResources(minResources, c.Resources)

}

}

}

return &minResources

}

func addResources(minResources corev1.ResourceList, resources corev1.ResourceRequirements) {

if minResources == nil || cmp.Equal(resources, corev1.ResourceRequirements{}) {

return

}

// If Requests is omitted for a container,

// it defaults to Limits if that is explicitly specified.

merged := corev1.ResourceList{}

for name, requests := range resources.Requests {

if limits, ok := resources.Limits[name]; ok {

merged[name] = limits

} else {

merged[name] = requests

}

}

for name, limits := range resources.Limits {

if _, ok := merged[name]; !ok {

merged[name] = limits

}

}

for name, quantity := range merged {

if q, ok := minResources[name]; !ok {

minResources[name] = quantity.DeepCopy()

} else {

q.Add(quantity)

minResources[name] = q

}

}

}

type replicasOrder []replicaPriority

func (p replicasOrder) Len() int {

return len(p)

}

func (p replicasOrder) Less(i, j int) bool {

return p[i].priority < p[j].priority

}

func (p replicasOrder) Swap(i, j int) {

p[i], p[j] = p[j], p[i]

}

/help

Instructions for interacting with me using PR comments are available here. If you have questions or suggestions related to my behavior, please file an issue against the kubernetes/test-infra repository.

alculquicondor · 2023-03-02T18:10:51Z

To clarify, we are looking for help from folks that have experience with volcano.

lowang-bh · 2023-06-11T04:28:01Z

/assign

google-oss-prow bot added the help wanted Extra attention is needed label Mar 2, 2023

This was referenced Jun 8, 2023

Fix a bug that the PodGroupCtrl can not list priorityclass #561

Merged

[Help] looking for contributors who maintain volcano integration kubeflow/training-operator#1827

Closed

google-oss-prow bot assigned lowang-bh Jun 11, 2023

lowang-bh mentioned this issue Jun 11, 2023

add volcano gang-scheduler pg min resource calculation #566

Merged

google-oss-prow bot closed this as completed in #566 Jun 16, 2023

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Consider handling the minResources when using volcano as a gang scheduler #535

Consider handling the minResources when using volcano as a gang scheduler #535

tenzen-y commented Mar 2, 2023

google-oss-prow bot commented Mar 2, 2023

alculquicondor commented Mar 2, 2023

lowang-bh commented Jun 11, 2023

Consider handling the minResources when using volcano as a gang scheduler #535

Consider handling the minResources when using volcano as a gang scheduler #535

Comments

tenzen-y commented Mar 2, 2023

google-oss-prow bot commented Mar 2, 2023

alculquicondor commented Mar 2, 2023

lowang-bh commented Jun 11, 2023