Skip to content

Commit

Permalink
add e2e test for network subdomain
Browse files Browse the repository at this point in the history
Signed-off-by: vsoch <vsoch@users.noreply.github.com>
  • Loading branch information
vsoch committed Jun 2, 2023
1 parent 53c513f commit 24d731b
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 26 deletions.
4 changes: 2 additions & 2 deletions api/jobset/v1alpha1/jobset_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,13 +128,13 @@ type ReplicatedJob struct {

type Network struct {
// EnableDNSHostnames allows pods to be reached via their hostnames.
// Pods will be reachable using the fully qualified pod hostname, which is in the format:
// <jobSet.name>-<spec.replicatedJob.name>-<job-index>-<pod-index>.<jobSet.name>-<spec.replicatedJob.name>
// Pods will be reachable using the fully qualified pod hostname.
// +optional
EnableDNSHostnames *bool `json:"enableDNSHostnames,omitempty"`

// Subdomain is an explicit choice for a network subdomain name
// When set, the replicated job is added to this network. This would allow sharing common networks
// If Subdomain is not set, we use <jobSet.name>-<spec.replicatedJob.name>
// +optional
Subdomain string `json:"subdomain,omitempty"`
}
Expand Down
6 changes: 3 additions & 3 deletions config/components/crd/bases/jobset.x-k8s.io_jobsets.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,15 @@ spec:
job.
properties:
enableDNSHostnames:
description: 'EnableDNSHostnames allows pods to be reached
description: EnableDNSHostnames allows pods to be reached
via their hostnames. Pods will be reachable using the
fully qualified pod hostname, which is in the format:
<jobSet.name>-<spec.replicatedJob.name>-<job-index>-<pod-index>.<jobSet.name>-<spec.replicatedJob.name>'
fully qualified pod hostname.
type: boolean
subdomain:
description: Subdomain is an explicit choice for a network
subdomain name When set, the replicated job is added to
this network. This would allow sharing common networks
If Subdomain is not set, we use <jobSet.name>-<spec.replicatedJob.name>
type: string
type: object
replicas:
Expand Down
5 changes: 2 additions & 3 deletions pkg/controllers/jobset_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -572,8 +572,8 @@ func constructJob(js *jobset.JobSet, rjob *jobset.ReplicatedJob, jobIdx int) (*b
labelAndAnnotateObject(&job.Spec.Template, js, rjob, jobIdx)

// If enableDNSHostnames is set, update job spec to set subdomain as
// job name (a headless service with a chosen name or the
// same name as job will be created or found later).
// the chosen subdomain name or the default name (i.e. <jobsetName>-<replicatedJobName>).
// A corresponding headless service with this name will be created later (if does not exist).
if dnsHostnamesEnabled(rjob) {
job.Spec.Template.Spec.Subdomain = GenSubdomain(js, rjob)
}
Expand Down Expand Up @@ -681,7 +681,6 @@ func genJobName(js *jobset.JobSet, rjob *jobset.ReplicatedJob, jobIndex int) str
}

func GenSubdomain(js *jobset.JobSet, rjob *jobset.ReplicatedJob) string {

// If we have selected an explicit network name, use it
if rjob.Network.Subdomain != "" {
return rjob.Network.Subdomain
Expand Down
97 changes: 79 additions & 18 deletions test/e2e/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,25 +82,39 @@ var _ = ginkgo.Describe("JobSet", func() {
util.JobSetCompleted(ctx, k8sClient, js, timeout)
})
})
}) // end of Describe
ginkgo.When("dns hostnames is enabled with custom subdomain", func() {
ginkgo.It("should enable pods to ping each other via hostname with custom subdomain", func() {
ctx := context.Background()

// 1 replicated job with 4 replicas, DNS hostnames enabled
func pingTestJobSet(ns *corev1.Namespace) *testing.JobSetWrapper {
jsName := "js"
rjobName := "rjob"
replicas := 4
var podHostnames []string
for jobIdx := 0; jobIdx < replicas; jobIdx++ {
// Pod hostname format:
// <jobSet.name>-<spec.replicatedJob.name>-<job-index>-<pod-index>.<jobSet.name>-<spec.replicatedJob.name>
podHostnames = append(podHostnames, fmt.Sprintf("%s-%s-%d-0.%s-%s", jsName, rjobName, jobIdx, jsName, rjobName))
}
// Create JobSet.
ginkgo.By("creating jobset with subdomain")
js := pingTestJobSetSubdomain(ns).Obj()

// Verify jobset created successfully.
ginkgo.By("checking that jobset creation succeeds")
gomega.Expect(k8sClient.Create(ctx, js)).Should(gomega.Succeed())

// This bash script loops infinitely until it successfully pings all pods by hostname.
// Once successful, it sleeps for a short period to reduce flakiness, since occasionally
// all pods but one will successfully ping eachother and complete before the last one
// successfully pings them all, resulting in a failed test run.
cmd := fmt.Sprintf(`for pod in {"%s","%s","%s","%s"}
// We'll need to retry getting this newly created jobset, given that creation may not immediately happen.
gomega.Eventually(k8sClient.Get(ctx, types.NamespacedName{Name: js.Name, Namespace: js.Namespace}, &jobset.JobSet{}), timeout, interval).Should(gomega.Succeed())

ginkgo.By("checking all jobs were created successfully")
gomega.Eventually(util.NumJobs, timeout, interval).WithArguments(ctx, k8sClient, js).Should(gomega.Equal(util.NumExpectedJobs(js)))

// Check jobset status if specified.
ginkgo.By("checking jobset condition")
util.JobSetCompleted(ctx, k8sClient, js, timeout)
})
})

}) // end of Describe

// getPingCommand returns ping command for 4 hostnames
// This bash script loops infinitely until it successfully pings all pods by hostname.
// Once successful, it sleeps for a short period to reduce flakiness, since occasionally
// all pods but one will successfully ping eachother and complete before the last one
// successfully pings them all, resulting in a failed test run.
func getPingCommand(hostnames []string) string {
return fmt.Sprintf(`for pod in {"%s","%s","%s","%s"}
do
gotStatus="-1"
wantStatus="0"
Expand All @@ -115,8 +129,54 @@ do
done
echo "Successfully pinged pod: $pod"
done
sleep 30`, podHostnames[0], podHostnames[1], podHostnames[2], podHostnames[3])
sleep 30`, hostnames[0], hostnames[1], hostnames[2], hostnames[3])
}

// 1 replicated job with 4 replicas, DNS hostnames enabled
func pingTestJobSet(ns *corev1.Namespace) *testing.JobSetWrapper {
jsName := "js"
rjobName := "rjob"
replicas := 4
var podHostnames []string
for jobIdx := 0; jobIdx < replicas; jobIdx++ {
// Pod hostname format:
// <jobSet.name>-<spec.replicatedJob.name>-<job-index>-<pod-index>.<jobSet.name>-<spec.replicatedJob.name>
podHostnames = append(podHostnames, fmt.Sprintf("%s-%s-%d-0.%s-%s", jsName, rjobName, jobIdx, jsName, rjobName))
}
cmd := getPingCommand(podHostnames)
return testing.MakeJobSet(jsName, ns.Name).
ReplicatedJob(testing.MakeReplicatedJob(rjobName).
Job(testing.MakeJobTemplate("job", ns.Name).
PodSpec(corev1.PodSpec{
RestartPolicy: "Never",
Containers: []corev1.Container{
{
Name: "ping-test-container",
Image: "bash:latest",
Command: []string{"bash", "-c"},
Args: []string{cmd},
},
},
}).Obj()).
Replicas(replicas).
EnableDNSHostnames(true).
Obj())
}

// 1 replicated job with 4 replicas, DNS hostnames + subdomain enabled
func pingTestJobSetSubdomain(ns *corev1.Namespace) *testing.JobSetWrapper {
jsName := "js"
rjobName := "rjob"
replicas := 4
subdomain := "network-subdomain"
var podHostnames []string
for jobIdx := 0; jobIdx < replicas; jobIdx++ {
// Pod hostname format:
// e.g.,js-rjob-0-0.network-subdomain.e2e-7vd7z.svc.cluster.local js-rjob-0-0
// <jobSet.name>-<spec.replicatedJob.name>-<job-index>-<pod-index>.<subdomain>
podHostnames = append(podHostnames, fmt.Sprintf("%s-%s-%d-0.%s", jsName, rjobName, jobIdx, subdomain))
}
cmd := getPingCommand(podHostnames)
return testing.MakeJobSet(jsName, ns.Name).
ReplicatedJob(testing.MakeReplicatedJob(rjobName).
Job(testing.MakeJobTemplate("job", ns.Name).
Expand All @@ -133,5 +193,6 @@ sleep 30`, podHostnames[0], podHostnames[1], podHostnames[2], podHostnames[3])
}).Obj()).
Replicas(replicas).
EnableDNSHostnames(true).
NetworkSubdomain(subdomain).
Obj())
}

0 comments on commit 24d731b

Please sign in to comment.