Skip to content

Commit

Permalink
add support for Network.Subdomain
Browse files Browse the repository at this point in the history
Problem: A user that wants to create jobs on the same network, or an existing
network, currently cannot, as the default with DNS enabled is to create
different networks.
Solution: Allow the user to specify a subdomain for the Network.

Signed-off-by: vsoch <vsoch@users.noreply.github.com>
  • Loading branch information
vsoch committed Jun 15, 2023
1 parent cfd9d7c commit 4ac76ff
Show file tree
Hide file tree
Showing 11 changed files with 292 additions and 117 deletions.
19 changes: 14 additions & 5 deletions api/jobset/v1alpha2/jobset_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@ type JobSetSpec struct {
// +listMapKey=name
ReplicatedJobs []ReplicatedJob `json:"replicatedJobs,omitempty"`

// Network defines the networking options for the jobset.
// +kubebuilder:validation:XValidation:rule="self == oldSelf",message="Value is immutable"
// +optional
Network *Network `json:"network,omitempty"`

// SuccessPolicy configures when to declare the JobSet as
// succeeded.
// The JobSet is always declared succeeded if all jobs in the set
Expand Down Expand Up @@ -118,9 +123,7 @@ type ReplicatedJob struct {
Name string `json:"name"`
// Template defines the template of the Job that will be created.
Template batchv1.JobTemplateSpec `json:"template"`
// Network defines the networking options for the job.
// +optional
Network *Network `json:"network,omitempty"`

// Replicas is the number of jobs that will be created from this ReplicatedJob's template.
// Jobs names will be in the format: <jobSet.name>-<spec.replicatedJob.name>-<job-index>
// +kubebuilder:default=1
Expand All @@ -129,10 +132,16 @@ type ReplicatedJob struct {

type Network struct {
// EnableDNSHostnames allows pods to be reached via their hostnames.
// Pods will be reachable using the fully qualified pod hostname, which is in the format:
// <jobSet.name>-<spec.replicatedJob.name>-<job-index>-<pod-index>.<jobSet.name>-<spec.replicatedJob.name>
// Pods will be reachable using the fully qualified pod hostname:
// <jobSet.name>-<spec.replicatedJob.name>-<job-index>-<pod-index>.<subdomain>
// +optional
EnableDNSHostnames *bool `json:"enableDNSHostnames,omitempty"`

// Subdomain is an explicit choice for a network subdomain name
// When set, any replicated job in the set is added to this network.
// Defaults to <jobSet.name> if not set.
// +optional
Subdomain string `json:"subdomain,omitempty"`
}

// Operator defines the target of a SuccessPolicy or FailurePolicy.
Expand Down
25 changes: 21 additions & 4 deletions api/jobset/v1alpha2/jobset_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@ import (

apivalidation "k8s.io/apimachinery/pkg/api/validation"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/validation"
"k8s.io/apimachinery/pkg/util/validation/field"
"k8s.io/utils/pointer"

ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/webhook"
util "sigs.k8s.io/jobset/pkg/util/collections"
Expand Down Expand Up @@ -52,12 +54,17 @@ func (js *JobSet) Default() {
js.Spec.ReplicatedJobs[i].Template.Spec.CompletionMode = completionModePtr(batchv1.IndexedCompletion)
}
// Enable DNS hostnames by default.
if js.Spec.ReplicatedJobs[i].Network == nil {
js.Spec.ReplicatedJobs[i].Network = &Network{}
if js.Spec.Network == nil {
js.Spec.Network = &Network{}
}
if js.Spec.Network.EnableDNSHostnames == nil {
js.Spec.Network.EnableDNSHostnames = pointer.Bool(true)
}
if js.Spec.ReplicatedJobs[i].Network.EnableDNSHostnames == nil {
js.Spec.ReplicatedJobs[i].Network.EnableDNSHostnames = pointer.Bool(true)
// Subdomain defaults to the JobSet name
if js.Spec.Network.Subdomain == "" {
js.Spec.Network.Subdomain = js.Name
}

// Default pod restart policy to OnFailure.
if js.Spec.ReplicatedJobs[i].Template.Spec.Template.Spec.RestartPolicy == "" {
js.Spec.ReplicatedJobs[i].Template.Spec.Template.Spec.RestartPolicy = corev1.RestartPolicyOnFailure
Expand All @@ -74,6 +81,16 @@ func (js *JobSet) ValidateCreate() error {
var allErrs []error
// Validate that replicatedJobs listed in success policy are part of this JobSet.
validReplicatedJobs := replicatedJobNamesFromSpec(js)

// Ensure that a provided subdomain is a valid DNS name
if js.Spec.Network != nil && js.Spec.Network.Subdomain != "" {

// This can return 1 or 2 errors, validating max length and format
for _, errMessage := range validation.IsDNS1123Subdomain(js.Spec.Network.Subdomain) {
allErrs = append(allErrs, fmt.Errorf(errMessage))
}
}

for _, rjob := range js.Spec.ReplicatedJobs {
var parallelism int32 = 1
if rjob.Template.Spec.Parallelism != nil {
Expand Down
75 changes: 59 additions & 16 deletions api/jobset/v1alpha2/jobset_webhook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"github.com/stretchr/testify/assert"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/utils/pointer"
)

Expand Down Expand Up @@ -38,21 +39,22 @@ func TestJobSetDefaulting(t *testing.T) {
js: &JobSet{
Spec: JobSetSpec{
SuccessPolicy: defaultSuccessPolicy,
Network: &Network{EnableDNSHostnames: pointer.Bool(true)},
ReplicatedJobs: []ReplicatedJob{
{
Template: batchv1.JobTemplateSpec{
Spec: batchv1.JobSpec{
Template: TestPodTemplate,
},
},
Network: &Network{EnableDNSHostnames: pointer.Bool(true)},
},
},
},
},
want: &JobSet{
Spec: JobSetSpec{
SuccessPolicy: defaultSuccessPolicy,
Network: &Network{EnableDNSHostnames: pointer.Bool(true)},
ReplicatedJobs: []ReplicatedJob{
{
Template: batchv1.JobTemplateSpec{
Expand All @@ -61,7 +63,6 @@ func TestJobSetDefaulting(t *testing.T) {
CompletionMode: completionModePtr(batchv1.IndexedCompletion),
},
},
Network: &Network{EnableDNSHostnames: pointer.Bool(true)},
},
},
},
Expand All @@ -72,6 +73,7 @@ func TestJobSetDefaulting(t *testing.T) {
js: &JobSet{
Spec: JobSetSpec{
SuccessPolicy: defaultSuccessPolicy,
Network: &Network{EnableDNSHostnames: pointer.Bool(true)},
ReplicatedJobs: []ReplicatedJob{
{
Template: batchv1.JobTemplateSpec{
Expand All @@ -80,14 +82,14 @@ func TestJobSetDefaulting(t *testing.T) {
CompletionMode: completionModePtr(batchv1.NonIndexedCompletion),
},
},
Network: &Network{EnableDNSHostnames: pointer.Bool(true)},
},
},
},
},
want: &JobSet{
Spec: JobSetSpec{
SuccessPolicy: defaultSuccessPolicy,
Network: &Network{EnableDNSHostnames: pointer.Bool(true)},
ReplicatedJobs: []ReplicatedJob{
{
Template: batchv1.JobTemplateSpec{
Expand All @@ -96,7 +98,6 @@ func TestJobSetDefaulting(t *testing.T) {
CompletionMode: completionModePtr(batchv1.NonIndexedCompletion),
},
},
Network: &Network{EnableDNSHostnames: pointer.Bool(true)},
},
},
},
Expand All @@ -122,6 +123,50 @@ func TestJobSetDefaulting(t *testing.T) {
want: &JobSet{
Spec: JobSetSpec{
SuccessPolicy: defaultSuccessPolicy,
Network: &Network{EnableDNSHostnames: pointer.Bool(true)},
ReplicatedJobs: []ReplicatedJob{
{
Template: batchv1.JobTemplateSpec{
Spec: batchv1.JobSpec{
Template: TestPodTemplate,
CompletionMode: completionModePtr(batchv1.IndexedCompletion),
},
},
},
},
},
},
},
{
name: "subdomain defaults to jobset name",
js: &JobSet{
ObjectMeta: metav1.ObjectMeta{
Name: "custom-jobset",
},
Spec: JobSetSpec{
SuccessPolicy: defaultSuccessPolicy,
ReplicatedJobs: []ReplicatedJob{
{
Template: batchv1.JobTemplateSpec{
Spec: batchv1.JobSpec{
Template: TestPodTemplate,
CompletionMode: completionModePtr(batchv1.IndexedCompletion),
},
},
},
},
},
},
want: &JobSet{
ObjectMeta: metav1.ObjectMeta{
Name: "custom-jobset",
},
Spec: JobSetSpec{
SuccessPolicy: defaultSuccessPolicy,
Network: &Network{
EnableDNSHostnames: pointer.Bool(true),
Subdomain: "custom-jobset",
},
ReplicatedJobs: []ReplicatedJob{
{
Template: batchv1.JobTemplateSpec{
Expand All @@ -130,7 +175,6 @@ func TestJobSetDefaulting(t *testing.T) {
CompletionMode: completionModePtr(batchv1.IndexedCompletion),
},
},
Network: &Network{EnableDNSHostnames: pointer.Bool(true)},
},
},
},
Expand All @@ -141,6 +185,7 @@ func TestJobSetDefaulting(t *testing.T) {
js: &JobSet{
Spec: JobSetSpec{
SuccessPolicy: defaultSuccessPolicy,
Network: &Network{EnableDNSHostnames: pointer.Bool(false)},
ReplicatedJobs: []ReplicatedJob{
{
Template: batchv1.JobTemplateSpec{
Expand All @@ -149,14 +194,14 @@ func TestJobSetDefaulting(t *testing.T) {
CompletionMode: completionModePtr(batchv1.NonIndexedCompletion),
},
},
Network: &Network{EnableDNSHostnames: pointer.Bool(false)},
},
},
},
},
want: &JobSet{
Spec: JobSetSpec{
SuccessPolicy: defaultSuccessPolicy,
Network: &Network{EnableDNSHostnames: pointer.Bool(false)},
ReplicatedJobs: []ReplicatedJob{
{
Template: batchv1.JobTemplateSpec{
Expand All @@ -165,7 +210,6 @@ func TestJobSetDefaulting(t *testing.T) {
CompletionMode: completionModePtr(batchv1.NonIndexedCompletion),
},
},
Network: &Network{EnableDNSHostnames: pointer.Bool(false)},
},
},
},
Expand All @@ -176,6 +220,7 @@ func TestJobSetDefaulting(t *testing.T) {
js: &JobSet{
Spec: JobSetSpec{
SuccessPolicy: defaultSuccessPolicy,
Network: &Network{EnableDNSHostnames: pointer.Bool(true)},
ReplicatedJobs: []ReplicatedJob{
{
Template: batchv1.JobTemplateSpec{
Expand All @@ -186,14 +231,14 @@ func TestJobSetDefaulting(t *testing.T) {
CompletionMode: completionModePtr(batchv1.IndexedCompletion),
},
},
Network: &Network{EnableDNSHostnames: pointer.Bool(true)},
},
},
},
},
want: &JobSet{
Spec: JobSetSpec{
SuccessPolicy: defaultSuccessPolicy,
Network: &Network{EnableDNSHostnames: pointer.Bool(true)},
ReplicatedJobs: []ReplicatedJob{
{
Template: batchv1.JobTemplateSpec{
Expand All @@ -206,7 +251,6 @@ func TestJobSetDefaulting(t *testing.T) {
CompletionMode: completionModePtr(batchv1.IndexedCompletion),
},
},
Network: &Network{EnableDNSHostnames: pointer.Bool(true)},
},
},
},
Expand All @@ -217,6 +261,7 @@ func TestJobSetDefaulting(t *testing.T) {
js: &JobSet{
Spec: JobSetSpec{
SuccessPolicy: defaultSuccessPolicy,
Network: &Network{EnableDNSHostnames: pointer.Bool(true)},
ReplicatedJobs: []ReplicatedJob{
{
Template: batchv1.JobTemplateSpec{
Expand All @@ -229,14 +274,14 @@ func TestJobSetDefaulting(t *testing.T) {
CompletionMode: completionModePtr(batchv1.IndexedCompletion),
},
},
Network: &Network{EnableDNSHostnames: pointer.Bool(true)},
},
},
},
},
want: &JobSet{
Spec: JobSetSpec{
SuccessPolicy: defaultSuccessPolicy,
Network: &Network{EnableDNSHostnames: pointer.Bool(true)},
ReplicatedJobs: []ReplicatedJob{
{
Template: batchv1.JobTemplateSpec{
Expand All @@ -249,7 +294,6 @@ func TestJobSetDefaulting(t *testing.T) {
CompletionMode: completionModePtr(batchv1.IndexedCompletion),
},
},
Network: &Network{EnableDNSHostnames: pointer.Bool(true)},
},
},
},
Expand All @@ -259,6 +303,7 @@ func TestJobSetDefaulting(t *testing.T) {
name: "success policy unset",
js: &JobSet{
Spec: JobSetSpec{
Network: &Network{EnableDNSHostnames: pointer.Bool(true)},
ReplicatedJobs: []ReplicatedJob{
{
Template: batchv1.JobTemplateSpec{
Expand All @@ -271,14 +316,14 @@ func TestJobSetDefaulting(t *testing.T) {
CompletionMode: completionModePtr(batchv1.IndexedCompletion),
},
},
Network: &Network{EnableDNSHostnames: pointer.Bool(true)},
},
},
},
},
want: &JobSet{
Spec: JobSetSpec{
SuccessPolicy: defaultSuccessPolicy,
Network: &Network{EnableDNSHostnames: pointer.Bool(true)},
ReplicatedJobs: []ReplicatedJob{
{
Template: batchv1.JobTemplateSpec{
Expand All @@ -291,7 +336,6 @@ func TestJobSetDefaulting(t *testing.T) {
CompletionMode: completionModePtr(batchv1.IndexedCompletion),
},
},
Network: &Network{EnableDNSHostnames: pointer.Bool(true)},
},
},
},
Expand All @@ -304,6 +348,7 @@ func TestJobSetDefaulting(t *testing.T) {
SuccessPolicy: &SuccessPolicy{
Operator: OperatorAny,
},
Network: &Network{EnableDNSHostnames: pointer.Bool(true)},
ReplicatedJobs: []ReplicatedJob{
{
Template: batchv1.JobTemplateSpec{
Expand All @@ -316,7 +361,6 @@ func TestJobSetDefaulting(t *testing.T) {
CompletionMode: completionModePtr(batchv1.IndexedCompletion),
},
},
Network: &Network{EnableDNSHostnames: pointer.Bool(true)},
},
},
},
Expand All @@ -326,6 +370,7 @@ func TestJobSetDefaulting(t *testing.T) {
SuccessPolicy: &SuccessPolicy{
Operator: OperatorAny,
},
Network: &Network{EnableDNSHostnames: pointer.Bool(true)},
ReplicatedJobs: []ReplicatedJob{
{
Template: batchv1.JobTemplateSpec{
Expand All @@ -338,14 +383,12 @@ func TestJobSetDefaulting(t *testing.T) {
CompletionMode: completionModePtr(batchv1.IndexedCompletion),
},
},
Network: &Network{EnableDNSHostnames: pointer.Bool(true)},
},
},
},
},
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
tc.js.Default()
Expand Down

0 comments on commit 4ac76ff

Please sign in to comment.