kubeflow · k8s-ci-robot · Jan 15, 2019 · Jan 10, 2019 · Jan 10, 2019 · Jan 14, 2019
diff --git a/cmd/mpi-operator/main.go b/cmd/mpi-operator/main.go
@@ -29,11 +29,13 @@ import (
 )
 
 var (
-	masterURL            string
-	kubeConfig           string
-	gpusPerNode          int
-	kubectlDeliveryImage string
-	namespace            string
+	masterURL              string
+	kubeConfig             string
+	gpusPerNode            int
+	processingUnitsPerNode int
+	processingResourceType string
+	kubectlDeliveryImage   string
+	namespace              string
 )
 
 func main() {
@@ -78,6 +80,8 @@ func main() {
 		kubeInformerFactory.Batch().V1().Jobs(),
 		kubeflowInformerFactory.Kubeflow().V1alpha1().MPIJobs(),
 		gpusPerNode,
+		processingUnitsPerNode,
+		processingResourceType,
 		kubectlDeliveryImage)
 
 	go kubeInformerFactory.Start(stopCh)
@@ -98,4 +102,10 @@ func init() {
 		"The maximum number of GPUs available per node. Note that this will be ignored if the GPU resources are explicitly specified in the MPIJob pod spec.")
 	flag.StringVar(&kubectlDeliveryImage, "kubectl-delivery-image", "", "The container image used to deliver the kubectl binary.")
 	flag.StringVar(&namespace, "namespace", "", "The namespace used to obtain the listers.")
+	flag.IntVar(
+		&processingUnitsPerNode,
+		"processing-units-per-node",
+		1,
+		"The maximum number of processing units available per node. Note that this will be ignored if the processing resources are explicitly specified in the MPIJob pod spec.")
+	flag.StringVar(&processingResourceType, "processing-resource-type", "nvidia.com/gpu", "The compute resource name, e.g. 'nvidia.com/gpu' or 'cpu'.")
 }
diff --git a/deploy/0-crd.yaml b/deploy/0-crd.yaml
@@ -18,7 +18,7 @@ spec:
       properties:
         spec:
           title: The MPIJob spec
-          description: Either `gpus` or `replicas` should be specified, but not both
+          description: Only one of `gpus`, `processingUnits`, or `replicas` should be specified
           oneOf:
           - properties:
               gpus:
@@ -33,13 +33,43 @@ spec:
                 - type: integer
                   multipleOf: 8
                   minimum: 8
+              slotsPerWorker:
+                title: The number of slots per worker used in hostfile
+                description: Defaults to the number of processing units per worker
+                type: integer
+                minimum: 1
             required:
             - gpus
+          - properties:
+              processingUnits:
+                title: Total number of processing units
+                description: Valid values are 1, 2, 4, or any multiple of 8
+                oneOf:
+                - type: integer
+                  enum:
+                  - 1
+                  - 2
+                  - 4
+                - type: integer
+                  multipleOf: 8
+                  minimum: 8
+              slotsPerWorker:
+                title: The number of slots per worker used in hostfile
+                description: Defaults to the number of processing units per worker
+                type: integer
+                minimum: 1
+            required:
+            - processingUnits
           - properties:
               replicas:
                 title: Total number of replicas
                 description: The GPU resource limit should be specified for each replica
                 type: integer
                 minimum: 1
+              slotsPerWorker:
+                title: The number of slots per worker used in hostfile
+                description: Defaults to the number of processing units per worker
+                type: integer
+                minimum: 1
             required:
             - replicas
diff --git a/pkg/apis/kubeflow/v1alpha1/types.go b/pkg/apis/kubeflow/v1alpha1/types.go
@@ -40,16 +40,27 @@ type MPIJobList struct {
 type MPIJobSpec struct {
 	// Specifies the desired number of GPUs the MPIJob should run on.
 	// Mutually exclusive with the `Replicas` field.
+	// Note that this is deprecated in favor of `ProcessingUnits` field.
 	// +optional
 	GPUs *int32 `json:"gpus,omitempty"`
 
+	// Specifies the desired number of processing units the MPIJob should run on.
+	// Mutually exclusive with the `Replicas` field.
+	// +optional
+	ProcessingUnits *int32 `json:"processingUnits,omitempty"`
+
+	// Specifies the number of slots per worker used in hostfile.
+	// Defaults to the number of processing units per worker.
+	// +optional
+	SlotsPerWorker *int32 `json:"slotsPerWorker,omitempty"`
+
 	// Run the launcher on the master.
-	// Optional: Default to false
+	// Defaults to false.
 	// +optional
 	LauncherOnMaster bool `json:"launcherOnMaster,omitempty"`
 
 	// Specifies the number of retries before marking this job failed.
-	// Defaults to 6
+	// Defaults to 6.
 	// +optional
 	BackoffLimit *int32 `json:"backoffLimit,omitempty"`
 
@@ -61,7 +72,7 @@ type MPIJobSpec struct {
 
 	// Specifies the desired number of replicas the MPIJob should run on.
 	// The `PodSpec` should specify the number of GPUs.
-	// Mutually exclusive with the `GPUs` field.
+	// Mutually exclusive with the `GPUs` or `ProcessingUnits` fields.
 	// +optional
 	Replicas *int32 `json:"replicas,omitempty"`
 

diff --git a/pkg/apis/kubeflow/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/kubeflow/v1alpha1/zz_generated.deepcopy.go