Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Set ClusterFirstWithHostNet DNS policy when the Pods use host network. #428

Merged
merged 2 commits into from
Sep 14, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ GitSHA=`git rev-parse HEAD`
Date=`date "+%Y-%m-%d %H:%M:%S"`
RELEASE_VERSION?=v0.3.0
CONTROLLER_VERSION?=v2
BASE_IMAGE_SSH_PORT?=2222
IMG_BUILDER=docker
LD_FLAGS=" \
-X '${REPO_PATH}/pkg/version.GitSHA=${GitSHA}' \
Expand Down Expand Up @@ -99,7 +100,7 @@ images:

.PHONY: test_images
test_images:
${IMG_BUILDER} build -t mpioperator/base examples/base
${IMG_BUILDER} build --build-arg port=${BASE_IMAGE_SSH_PORT} -t mpioperator/base examples/base
${IMG_BUILDER} build -t mpioperator/openmpi examples/base -f examples/base/openmpi.Dockerfile
${IMG_BUILDER} build -t mpioperator/openmpi-builder examples/base -f examples/base/openmpi-builder.Dockerfile
${IMG_BUILDER} build -t mpioperator/mpi-pi:openmpi examples/pi
Expand Down
9 changes: 7 additions & 2 deletions examples/base/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
FROM debian:buster

ARG port=2222

RUN apt update && apt install -y --no-install-recommends \
openssh-server \
openssh-client \
Expand All @@ -14,11 +16,14 @@ RUN setcap CAP_NET_BIND_SERVICE=+eip /usr/sbin/sshd
# mpi-operator mounts the .ssh folder from a Secret. For that to work, we need
# to disable UserKnownHostsFile to avoid write permissions.
# Disabling StrictModes avoids directory and files read permission checks.
RUN sed -i 's/[ #]\(.*StrictHostKeyChecking \).*/ \1no/g' /etc/ssh/ssh_config \
RUN sed -i "s/[ #]\(.*StrictHostKeyChecking \).*/ \1no/g" /etc/ssh/ssh_config \
&& echo " UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config \
&& sed -i 's/#\(StrictModes \).*/\1no/g' /etc/ssh/sshd_config
&& sed -i "s/[ #]\(.*Port \).*/ \1$port/g" /etc/ssh/ssh_config \
&& sed -i "s/#\(StrictModes \).*/\1no/g" /etc/ssh/sshd_config \
&& sed -i "s/#\(Port \).*/\1$port/g" /etc/ssh/sshd_config

RUN useradd -m mpiuser
WORKDIR /home/mpiuser
# Configurations for running sshd as non-root.
COPY --chown=mpiuser sshd_config .sshd_config
RUN echo "Port $port" >> /home/mpiuser/.sshd_config
5 changes: 1 addition & 4 deletions examples/pi/pi-intel.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,6 @@ spec:
name: mpi-worker
securityContext:
runAsUser: 1000
capabilities:
add:
- NET_BIND_SERVICE
command:
args:
- /usr/sbin/sshd
Expand All @@ -49,7 +46,7 @@ spec:
- /home/mpiuser/.sshd_config
readinessProbe:
tcpSocket:
port: 22
port: 2222
initialDelaySeconds: 2
resources:
limits:
Expand Down
3 changes: 0 additions & 3 deletions examples/pi/pi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,6 @@ spec:
name: mpi-worker
securityContext:
runAsUser: 1000
capabilities:
add:
- NET_BIND_SERVICE
command:
- /usr/sbin/sshd
args:
Expand Down
10 changes: 10 additions & 0 deletions v2/pkg/controller/mpi_job_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -1261,6 +1261,11 @@ func (c *MPIJobController) newWorker(mpiJob *kubeflow.MPIJob, index int) *corev1
podTemplate.Labels[common.ReplicaIndexLabel] = strconv.Itoa(index)
podTemplate.Spec.Hostname = name
podTemplate.Spec.Subdomain = mpiJob.Name + workerSuffix // Matches workers' Service name.
if podTemplate.Spec.HostNetwork {
// Allows resolution of worker hostnames without needing to include the
// namespace or cluster domain.
podTemplate.Spec.DNSPolicy = corev1.DNSClusterFirstWithHostNet
}
setRestartPolicy(podTemplate, mpiJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeWorker])

container := &podTemplate.Spec.Containers[0]
Expand Down Expand Up @@ -1348,6 +1353,11 @@ func (c *MPIJobController) newLauncherPodTemplate(mpiJob *kubeflow.MPIJob) corev
}
podTemplate.Spec.Hostname = launcherName
podTemplate.Spec.Subdomain = mpiJob.Name + workerSuffix // Matches workers' Service name.
if podTemplate.Spec.HostNetwork {
// Allows resolution of worker hostnames without needing to include the
// namespace or cluster domain.
podTemplate.Spec.DNSPolicy = corev1.DNSClusterFirstWithHostNet
}
container := &podTemplate.Spec.Containers[0]
container.Env = append(container.Env, launcherEnvVars...)
slotsStr := strconv.Itoa(int(*mpiJob.Spec.SlotsPerWorker))
Expand Down
6 changes: 6 additions & 0 deletions v2/pkg/controller/mpi_job_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1104,6 +1104,7 @@ func TestNewLauncherAndWorker(t *testing.T) {
Labels: map[string]string{"foo": "bar"},
},
Spec: corev1.PodSpec{
HostNetwork: true,
Containers: []corev1.Container{
{
Env: []corev1.EnvVar{
Expand All @@ -1127,6 +1128,7 @@ func TestNewLauncherAndWorker(t *testing.T) {
kubeflow.MPIReplicaTypeWorker: {
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
HostNetwork: true,
Containers: []corev1.Container{
{
Command: []string{"/entrypoint.sh"},
Expand Down Expand Up @@ -1164,6 +1166,8 @@ func TestNewLauncherAndWorker(t *testing.T) {
},
},
Spec: corev1.PodSpec{
HostNetwork: true,
DNSPolicy: corev1.DNSClusterFirstWithHostNet,
Hostname: "bar-launcher",
Subdomain: "bar-worker",
RestartPolicy: corev1.RestartPolicyOnFailure,
Expand Down Expand Up @@ -1225,6 +1229,8 @@ func TestNewLauncherAndWorker(t *testing.T) {
},
},
Spec: corev1.PodSpec{
HostNetwork: true,
DNSPolicy: corev1.DNSClusterFirstWithHostNet,
Hostname: "bar-worker-12",
Subdomain: "bar-worker",
RestartPolicy: corev1.RestartPolicyNever,
Expand Down
18 changes: 17 additions & 1 deletion v2/test/e2e/mpi_job_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,22 @@ var _ = ginkgo.Describe("MPIJob", func() {
mpiJob := createJobAndWaitForCompletion(mpiJob)
expectConditionToBeTrue(mpiJob, common.JobSucceeded)
})

ginkgo.When("running with host network", func() {
ginkgo.BeforeEach(func() {
mpiJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeLauncher].Template.Spec.HostNetwork = true
mpiJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeWorker].Template.Spec.HostNetwork = true
// The test cluster has only one node.
// More than one pod cannot use the same host port for sshd.
mpiJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeLauncher].Template.Spec.Containers[0].Args = []string{"/home/mpiuser/pi"}
mpiJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeWorker].Replicas = newInt32(1)
})

ginkgo.It("should succeed", func() {
mpiJob := createJobAndWaitForCompletion(mpiJob)
expectConditionToBeTrue(mpiJob, common.JobSucceeded)
})
})
})

ginkgo.When("running as non-root", func() {
Expand Down Expand Up @@ -176,7 +192,7 @@ var _ = ginkgo.Describe("MPIJob", func() {
ReadinessProbe: &corev1.Probe{
Handler: corev1.Handler{
TCPSocket: &corev1.TCPSocketAction{
Port: intstr.FromInt(22),
Port: intstr.FromInt(2222),
},
},
InitialDelaySeconds: 3,
Expand Down