-
Notifications
You must be signed in to change notification settings - Fork 217
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Mount SSH Secret directly on main container #416
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,5 +24,12 @@ RUN setcap CAP_NET_BIND_SERVICE=+eip /usr/sbin/sshd | |
RUN useradd -m mpiuser | ||
WORKDIR /home/mpiuser | ||
COPY --chown=mpiuser sshd_config .sshd_config | ||
RUN sed -i 's/[ #]\(.*StrictHostKeyChecking \).*/ \1no/g' /etc/ssh/ssh_config | ||
# Allow OpenSSH to talk to containers without asking for confirmation | ||
# by disabling StrictHostKeyChecking. | ||
# mpi-operator mounts the .ssh folder from a Secret. For that to work, we need | ||
# to disable UserKnownHostsFile to avoid write permissions. | ||
# Disabling StrictModes avoids directory and files read permission checks. | ||
RUN sed -i 's/[ #]\(.*StrictHostKeyChecking \).*/ \1no/g' /etc/ssh/ssh_config && \ | ||
echo " UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config && \ | ||
sed -i 's/#\(StrictModes \).*/\1no/g' /etc/ssh/sshd_config | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. apart from lines {8, 9, 32}, should we put the rest in a common image? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll do it in a followup. |
||
COPY --from=builder /pi /home/mpiuser/pi |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
PidFile /home/mpiuser/sshd.pid | ||
HostKey /home/mpiuser/.ssh/id_rsa | ||
StrictModes no |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -73,9 +73,7 @@ const ( | |
discoverHostsScriptName = "discover_hosts.sh" | ||
sshAuthSecretSuffix = "-ssh" | ||
sshAuthVolume = "ssh-auth" | ||
sshAuthMountPath = "/mnt/ssh" | ||
sshHomeInitMountPath = "/mnt/home-ssh" | ||
sshHomeVolume = "ssh-home" | ||
rootSSHPath = "/root/.ssh" | ||
launcher = "launcher" | ||
worker = "worker" | ||
launcherSuffix = "-launcher" | ||
|
@@ -242,8 +240,6 @@ type MPIJobController struct { | |
recorder record.EventRecorder | ||
// Gang scheduler name to use | ||
gangSchedulerName string | ||
// Container image used for scripting. | ||
scriptingImage string | ||
|
||
// To allow injection of updateStatus for testing. | ||
updateStatusHandler func(mpijob *kubeflow.MPIJob) error | ||
|
@@ -261,7 +257,7 @@ func NewMPIJobController( | |
podInformer coreinformers.PodInformer, | ||
podgroupsInformer podgroupsinformer.PodGroupInformer, | ||
mpiJobInformer informers.MPIJobInformer, | ||
gangSchedulerName, scriptingImage string) *MPIJobController { | ||
gangSchedulerName string) *MPIJobController { | ||
|
||
// Create event broadcaster. | ||
klog.V(4).Info("Creating event broadcaster") | ||
|
@@ -298,7 +294,6 @@ func NewMPIJobController( | |
queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "MPIJobs"), | ||
recorder: recorder, | ||
gangSchedulerName: gangSchedulerName, | ||
scriptingImage: scriptingImage, | ||
} | ||
|
||
controller.updateStatusHandler = controller.doUpdateJobStatus | ||
|
@@ -1516,57 +1511,28 @@ func workerReplicas(job *kubeflow.MPIJob) int32 { | |
} | ||
|
||
func (c *MPIJobController) setupSSHOnPod(podSpec *corev1.PodSpec, job *kubeflow.MPIJob) { | ||
var mode *int32 | ||
if job.Spec.SSHAuthMountPath == rootSSHPath { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OOC, why is this part of the job spec? is the default There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, that's the default. We need the user to tell us where to put the credentials for the image to use it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. interesting, why would one want to change the ssh configuration in the container image 😅 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The folder will host the keys, not the configuration. And this location varies depending on the home directory. There is no way we can deduce this from the image. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can't we just use the env variable $HOME somehow? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We wouldn't have access to it from the controller. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we need it inside the controller for anything other than setting the MountPath below? if only that, then it seems there is a way: https://kubernetes.io/docs/concepts/storage/volumes/#using-subpath-expanded-environment There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's pretty cool, but I would assume it's only possible for variables defined by the downward API (thus, known at Pod sandbox creation time, in kubelet). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, it seems so. |
||
mode = newInt32(0600) | ||
} | ||
mainContainer := &podSpec.Containers[0] | ||
podSpec.Volumes = append(podSpec.Volumes, | ||
corev1.Volume{ | ||
Name: sshAuthVolume, | ||
VolumeSource: corev1.VolumeSource{ | ||
Secret: &corev1.SecretVolumeSource{ | ||
SecretName: job.Name + sshAuthSecretSuffix, | ||
Items: sshVolumeItems, | ||
DefaultMode: mode, | ||
SecretName: job.Name + sshAuthSecretSuffix, | ||
Items: sshVolumeItems, | ||
}, | ||
}, | ||
}, | ||
corev1.Volume{ | ||
Name: sshHomeVolume, | ||
VolumeSource: corev1.VolumeSource{ | ||
EmptyDir: &corev1.EmptyDirVolumeSource{}, | ||
}, | ||
}) | ||
|
||
mainContainer := &podSpec.Containers[0] | ||
mainContainer.VolumeMounts = append(mainContainer.VolumeMounts, | ||
corev1.VolumeMount{ | ||
Name: sshHomeVolume, | ||
Name: sshAuthVolume, | ||
MountPath: job.Spec.SSHAuthMountPath, | ||
}) | ||
|
||
// The init script sets the permissions of the ssh folder in the user's home | ||
// directory. The ownership is set based on the security context of the | ||
// launcher's first container. | ||
launcherSecurityCtx := job.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeLauncher].Template.Spec.Containers[0].SecurityContext | ||
initScript := "" + | ||
"cp -RL /mnt/ssh/* /mnt/home-ssh && " + | ||
"chmod 700 /mnt/home-ssh && " + | ||
"chmod 600 /mnt/home-ssh/*" | ||
if launcherSecurityCtx != nil && launcherSecurityCtx.RunAsUser != nil { | ||
initScript += fmt.Sprintf(" && chown %d -R /mnt/home-ssh", *launcherSecurityCtx.RunAsUser) | ||
} | ||
podSpec.InitContainers = append(podSpec.InitContainers, corev1.Container{ | ||
Name: "init-ssh", | ||
Image: c.scriptingImage, | ||
VolumeMounts: []corev1.VolumeMount{ | ||
{ | ||
Name: sshAuthVolume, | ||
MountPath: sshAuthMountPath, | ||
}, | ||
{ | ||
Name: sshHomeVolume, | ||
MountPath: sshHomeInitMountPath, | ||
}, | ||
}, | ||
Command: []string{"/bin/sh"}, | ||
Args: []string{"-c", initScript}, | ||
}) | ||
} | ||
|
||
func ownerReferenceAndGVK(object metav1.Object) (*metav1.OwnerReference, schema.GroupVersionKind, error) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
update the comment to explain what each of the three options does?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done