From 5110db5087513801fc5946abcbd73c44e692b2c8 Mon Sep 17 00:00:00 2001 From: Jan Safranek Date: Mon, 5 Mar 2018 09:14:44 +0100 Subject: [PATCH 1/3] Lock subPath volumes Users must not be allowed to step outside the volume with subPath. Therefore the final subPath directory must be "locked" somehow and checked if it's inside volume. On Windows, we lock the directories. On Linux, we bind-mount the final subPath into /var/lib/kubelet/pods//volume-subpaths//, it can't be changed to symlink user once it's bind-mounted. --- .../cm/container_manager_linux_test.go | 12 + pkg/kubelet/container/helpers.go | 2 +- .../container/testing/fake_runtime_helper.go | 4 +- pkg/kubelet/kubelet_pods.go | 72 +- pkg/kubelet/kubelet_pods_test.go | 6 +- pkg/kubelet/kubelet_pods_windows_test.go | 4 +- .../kuberuntime/kuberuntime_container.go | 22 +- .../kuberuntime_container_linux_test.go | 8 +- .../kuberuntime/kuberuntime_manager_test.go | 2 +- pkg/kubelet/rkt/rkt.go | 2 +- pkg/kubelet/util/BUILD | 8 +- pkg/kubelet/util/util.go | 13 + pkg/kubelet/util/util_unsupported.go | 9 + .../volumemanager/reconciler/reconciler.go | 4 +- .../reconciler/reconciler_test.go | 2 +- pkg/util/mount/BUILD | 20 +- pkg/util/mount/exec_mount.go | 13 + pkg/util/mount/exec_mount_test.go | 12 + pkg/util/mount/exec_mount_unsupported.go | 13 + pkg/util/mount/fake.go | 11 + pkg/util/mount/mount.go | 59 +- pkg/util/mount/mount_linux.go | 445 ++++++ pkg/util/mount/mount_linux_test.go | 1206 +++++++++++++++++ pkg/util/mount/mount_unsupported.go | 13 + pkg/util/mount/mount_windows.go | 234 ++++ pkg/util/mount/mount_windows_test.go | 414 ++++++ pkg/util/mount/nsenter_mount.go | 48 + pkg/util/mount/nsenter_mount_test.go | 126 +- pkg/util/mount/nsenter_mount_unsupported.go | 13 + pkg/util/removeall/removeall_test.go | 12 + pkg/volume/host_path/host_path_test.go | 12 + .../operationexecutor/operation_executor.go | 7 +- .../operation_executor_test.go | 6 +- .../operationexecutor/operation_generator.go | 14 +- 34 files changed, 2775 insertions(+), 73 deletions(-) diff --git a/pkg/kubelet/cm/container_manager_linux_test.go b/pkg/kubelet/cm/container_manager_linux_test.go index c255967d73f7..3bbec062cb74 100644 --- a/pkg/kubelet/cm/container_manager_linux_test.go +++ b/pkg/kubelet/cm/container_manager_linux_test.go @@ -95,6 +95,18 @@ func (mi *fakeMountInterface) ExistsPath(pathname string) bool { return true } +func (mi *fakeMountInterface) PrepareSafeSubpath(subPath mount.Subpath) (newHostPath string, cleanupAction func(), err error) { + return "", nil, nil +} + +func (mi *fakeMountInterface) CleanSubPaths(_, _ string) error { + return nil +} + +func (mi *fakeMountInterface) SafeMakeDir(_, _ string, _ os.FileMode) error { + return nil +} + func fakeContainerMgrMountInt() mount.Interface { return &fakeMountInterface{ []mount.MountPoint{ diff --git a/pkg/kubelet/container/helpers.go b/pkg/kubelet/container/helpers.go index 2320a192be0c..529c6dfe4e5a 100644 --- a/pkg/kubelet/container/helpers.go +++ b/pkg/kubelet/container/helpers.go @@ -46,7 +46,7 @@ type HandlerRunner interface { // RuntimeHelper wraps kubelet to make container runtime // able to get necessary informations like the RunContainerOptions, DNS settings, Host IP. type RuntimeHelper interface { - GenerateRunContainerOptions(pod *v1.Pod, container *v1.Container, podIP string) (contOpts *RunContainerOptions, err error) + GenerateRunContainerOptions(pod *v1.Pod, container *v1.Container, podIP string) (contOpts *RunContainerOptions, cleanupAction func(), err error) GetPodDNS(pod *v1.Pod) (dnsConfig *runtimeapi.DNSConfig, err error) // GetPodCgroupParent returns the CgroupName identifier, and its literal cgroupfs form on the host // of a pod. diff --git a/pkg/kubelet/container/testing/fake_runtime_helper.go b/pkg/kubelet/container/testing/fake_runtime_helper.go index 373022c350e9..21001c8d29ac 100644 --- a/pkg/kubelet/container/testing/fake_runtime_helper.go +++ b/pkg/kubelet/container/testing/fake_runtime_helper.go @@ -34,12 +34,12 @@ type FakeRuntimeHelper struct { Err error } -func (f *FakeRuntimeHelper) GenerateRunContainerOptions(pod *v1.Pod, container *v1.Container, podIP string) (*kubecontainer.RunContainerOptions, error) { +func (f *FakeRuntimeHelper) GenerateRunContainerOptions(pod *v1.Pod, container *v1.Container, podIP string) (*kubecontainer.RunContainerOptions, func(), error) { var opts kubecontainer.RunContainerOptions if len(container.TerminationMessagePath) != 0 { opts.PodContainerDir = f.PodContainerDir } - return &opts, nil + return &opts, nil, nil } func (f *FakeRuntimeHelper) GetPodCgroupParent(pod *v1.Pod) string { diff --git a/pkg/kubelet/kubelet_pods.go b/pkg/kubelet/kubelet_pods.go index 0bad08d61e4e..6698a5df87a2 100644 --- a/pkg/kubelet/kubelet_pods.go +++ b/pkg/kubelet/kubelet_pods.go @@ -61,6 +61,7 @@ import ( kubetypes "k8s.io/kubernetes/pkg/kubelet/types" "k8s.io/kubernetes/pkg/kubelet/util/format" utilfile "k8s.io/kubernetes/pkg/util/file" + mountutil "k8s.io/kubernetes/pkg/util/mount" volumeutil "k8s.io/kubernetes/pkg/volume/util" "k8s.io/kubernetes/pkg/volume/util/volumepathhandler" volumevalidation "k8s.io/kubernetes/pkg/volume/validation" @@ -160,7 +161,7 @@ func (kl *Kubelet) makeBlockVolumes(pod *v1.Pod, container *v1.Container, podVol } // makeMounts determines the mount points for the given container. -func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, hostDomain, podIP string, podVolumes kubecontainer.VolumeMap) ([]kubecontainer.Mount, error) { +func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, hostDomain, podIP string, podVolumes kubecontainer.VolumeMap, mounter mountutil.Interface) ([]kubecontainer.Mount, func(), error) { // Kubernetes only mounts on /etc/hosts if: // - container is not an infrastructure (pause) container // - container is not already mounting on /etc/hosts @@ -170,13 +171,14 @@ func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, h mountEtcHostsFile := len(podIP) > 0 && runtime.GOOS != "windows" glog.V(3).Infof("container: %v/%v/%v podIP: %q creating hosts mount: %v", pod.Namespace, pod.Name, container.Name, podIP, mountEtcHostsFile) mounts := []kubecontainer.Mount{} - for _, mount := range container.VolumeMounts { + var cleanupAction func() = nil + for i, mount := range container.VolumeMounts { // do not mount /etc/hosts if container is already mounting on the path mountEtcHostsFile = mountEtcHostsFile && (mount.MountPath != etcHostsPath) vol, ok := podVolumes[mount.Name] if !ok || vol.Mounter == nil { glog.Errorf("Mount cannot be satisfied for container %q, because the volume is missing or the volume mounter is nil: %+v", container.Name, mount) - return nil, fmt.Errorf("cannot find volume %q to mount into container %q", mount.Name, container.Name) + return nil, cleanupAction, fmt.Errorf("cannot find volume %q to mount into container %q", mount.Name, container.Name) } relabelVolume := false @@ -189,25 +191,29 @@ func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, h } hostPath, err := volumeutil.GetPath(vol.Mounter) if err != nil { - return nil, err + return nil, cleanupAction, err } if mount.SubPath != "" { if filepath.IsAbs(mount.SubPath) { - return nil, fmt.Errorf("error SubPath `%s` must not be an absolute path", mount.SubPath) + return nil, cleanupAction, fmt.Errorf("error SubPath `%s` must not be an absolute path", mount.SubPath) } err = volumevalidation.ValidatePathNoBacksteps(mount.SubPath) if err != nil { - return nil, fmt.Errorf("unable to provision SubPath `%s`: %v", mount.SubPath, err) + return nil, cleanupAction, fmt.Errorf("unable to provision SubPath `%s`: %v", mount.SubPath, err) } fileinfo, err := os.Lstat(hostPath) if err != nil { - return nil, err + return nil, cleanupAction, err } perm := fileinfo.Mode() - hostPath = filepath.Join(hostPath, mount.SubPath) + volumePath, err := filepath.EvalSymlinks(hostPath) + if err != nil { + return nil, cleanupAction, err + } + hostPath = filepath.Join(volumePath, mount.SubPath) if subPathExists, err := utilfile.FileOrSymlinkExists(hostPath); err != nil { glog.Errorf("Could not determine if subPath %s exists; will not attempt to change its permissions", hostPath) @@ -216,17 +222,25 @@ func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, h // incorrect ownership and mode. For example, the sub path directory must have at least g+rwx // when the pod specifies an fsGroup, and if the directory is not created here, Docker will // later auto-create it with the incorrect mode 0750 - if err := os.MkdirAll(hostPath, perm); err != nil { - glog.Errorf("failed to mkdir:%s", hostPath) - return nil, err - } - - // chmod the sub path because umask may have prevented us from making the sub path with the same - // permissions as the mounter path - if err := os.Chmod(hostPath, perm); err != nil { - return nil, err + // Make extra care not to escape the volume! + if err := mounter.SafeMakeDir(hostPath, volumePath, perm); err != nil { + glog.Errorf("failed to mkdir %q: %v", hostPath, err) + return nil, cleanupAction, err } } + hostPath, cleanupAction, err = mounter.PrepareSafeSubpath(mountutil.Subpath{ + VolumeMountIndex: i, + Path: hostPath, + VolumeName: mount.Name, + VolumePath: volumePath, + PodDir: podDir, + ContainerName: container.Name, + }) + if err != nil { + // Don't pass detailed error back to the user because it could give information about host filesystem + glog.Errorf("failed to prepare subPath for volumeMount %q of container %q: %v", mount.Name, container.Name, err) + return nil, cleanupAction, fmt.Errorf("failed to prepare subPath for volumeMount %q of container %q", mount.Name, container.Name) + } } // Docker Volume Mounts fail on Windows if it is not of the form C:/ @@ -242,7 +256,7 @@ func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, h propagation, err := translateMountPropagation(mount.MountPropagation) if err != nil { - return nil, err + return nil, cleanupAction, err } glog.V(5).Infof("Pod %q container %q mount %q has propagation %q", format.Pod(pod), container.Name, mount.Name, propagation) @@ -261,11 +275,11 @@ func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, h hostAliases := pod.Spec.HostAliases hostsMount, err := makeHostsMount(podDir, podIP, hostName, hostDomain, hostAliases, pod.Spec.HostNetwork) if err != nil { - return nil, err + return nil, cleanupAction, err } mounts = append(mounts, *hostsMount) } - return mounts, nil + return mounts, cleanupAction, nil } // translateMountPropagation transforms v1.MountPropagationMode to @@ -437,17 +451,17 @@ func (kl *Kubelet) GetPodCgroupParent(pod *v1.Pod) string { // GenerateRunContainerOptions generates the RunContainerOptions, which can be used by // the container runtime to set parameters for launching a container. -func (kl *Kubelet) GenerateRunContainerOptions(pod *v1.Pod, container *v1.Container, podIP string) (*kubecontainer.RunContainerOptions, error) { +func (kl *Kubelet) GenerateRunContainerOptions(pod *v1.Pod, container *v1.Container, podIP string) (*kubecontainer.RunContainerOptions, func(), error) { opts, err := kl.containerManager.GetResources(pod, container) if err != nil { - return nil, err + return nil, nil, err } cgroupParent := kl.GetPodCgroupParent(pod) opts.CgroupParent = cgroupParent hostname, hostDomainName, err := kl.GeneratePodHostNameAndDomain(pod) if err != nil { - return nil, err + return nil, nil, err } opts.Hostname = hostname podName := volumeutil.GetUniquePodName(pod) @@ -457,7 +471,7 @@ func (kl *Kubelet) GenerateRunContainerOptions(pod *v1.Pod, container *v1.Contai // TODO(random-liu): Move following convert functions into pkg/kubelet/container devices, err := kl.makeGPUDevices(pod, container) if err != nil { - return nil, err + return nil, nil, err } opts.Devices = append(opts.Devices, devices...) @@ -466,20 +480,20 @@ func (kl *Kubelet) GenerateRunContainerOptions(pod *v1.Pod, container *v1.Contai blkutil := volumepathhandler.NewBlockVolumePathHandler() blkVolumes, err := kl.makeBlockVolumes(pod, container, volumes, blkutil) if err != nil { - return nil, err + return nil, nil, err } opts.Devices = append(opts.Devices, blkVolumes...) } - mounts, err := makeMounts(pod, kl.getPodDir(pod.UID), container, hostname, hostDomainName, podIP, volumes) + mounts, cleanupAction, err := makeMounts(pod, kl.getPodDir(pod.UID), container, hostname, hostDomainName, podIP, volumes, kl.mounter) if err != nil { - return nil, err + return nil, cleanupAction, err } opts.Mounts = append(opts.Mounts, mounts...) envs, err := kl.makeEnvironmentVariables(pod, container, podIP) if err != nil { - return nil, err + return nil, cleanupAction, err } opts.Envs = append(opts.Envs, envs...) @@ -499,7 +513,7 @@ func (kl *Kubelet) GenerateRunContainerOptions(pod *v1.Pod, container *v1.Contai opts.EnableHostUserNamespace = kl.enableHostUserNamespace(pod) } - return opts, nil + return opts, cleanupAction, nil } var masterServices = sets.NewString("kubernetes") diff --git a/pkg/kubelet/kubelet_pods_test.go b/pkg/kubelet/kubelet_pods_test.go index 70e561b2b82d..ee00373bf318 100644 --- a/pkg/kubelet/kubelet_pods_test.go +++ b/pkg/kubelet/kubelet_pods_test.go @@ -47,6 +47,7 @@ import ( containertest "k8s.io/kubernetes/pkg/kubelet/container/testing" "k8s.io/kubernetes/pkg/kubelet/server/portforward" "k8s.io/kubernetes/pkg/kubelet/server/remotecommand" + "k8s.io/kubernetes/pkg/util/mount" volumetest "k8s.io/kubernetes/pkg/volume/testing" ) @@ -303,6 +304,7 @@ func TestMakeMounts(t *testing.T) { for name, tc := range testCases { t.Run(name, func(t *testing.T) { + fm := &mount.FakeMounter{} pod := v1.Pod{ Spec: v1.PodSpec{ HostNetwork: true, @@ -315,7 +317,7 @@ func TestMakeMounts(t *testing.T) { return } - mounts, err := makeMounts(&pod, "/pod", &tc.container, "fakepodname", "", "", tc.podVolumes) + mounts, _, err := makeMounts(&pod, "/pod", &tc.container, "fakepodname", "", "", tc.podVolumes, fm) // validate only the error if we expect an error if tc.expectErr { @@ -338,7 +340,7 @@ func TestMakeMounts(t *testing.T) { t.Errorf("Failed to enable feature gate for MountPropagation: %v", err) return } - mounts, err = makeMounts(&pod, "/pod", &tc.container, "fakepodname", "", "", tc.podVolumes) + mounts, _, err = makeMounts(&pod, "/pod", &tc.container, "fakepodname", "", "", tc.podVolumes, fm) if !tc.expectErr { expectedPrivateMounts := []kubecontainer.Mount{} for _, mount := range tc.expectedMounts { diff --git a/pkg/kubelet/kubelet_pods_windows_test.go b/pkg/kubelet/kubelet_pods_windows_test.go index 1bc4227fca7c..cc16b358fb01 100644 --- a/pkg/kubelet/kubelet_pods_windows_test.go +++ b/pkg/kubelet/kubelet_pods_windows_test.go @@ -24,6 +24,7 @@ import ( "github.com/stretchr/testify/assert" "k8s.io/api/core/v1" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" + "k8s.io/kubernetes/pkg/util/mount" ) func TestMakeMountsWindows(t *testing.T) { @@ -64,7 +65,8 @@ func TestMakeMountsWindows(t *testing.T) { }, } - mounts, _ := makeMounts(&pod, "/pod", &container, "fakepodname", "", "", podVolumes) + fm := &mount.FakeMounter{} + mounts, _, _ := makeMounts(&pod, "/pod", &container, "fakepodname", "", "", podVolumes, fm) expectedMounts := []kubecontainer.Mount{ { diff --git a/pkg/kubelet/kuberuntime/kuberuntime_container.go b/pkg/kubelet/kuberuntime/kuberuntime_container.go index 2dadd7a66e63..ec9f29535e31 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_container.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_container.go @@ -106,11 +106,15 @@ func (m *kubeGenericRuntimeManager) startContainer(podSandboxID string, podSandb restartCount = containerStatus.RestartCount + 1 } - containerConfig, err := m.generateContainerConfig(container, pod, restartCount, podIP, imageRef, containerType) + containerConfig, cleanupAction, err := m.generateContainerConfig(container, pod, restartCount, podIP, imageRef, containerType) + if cleanupAction != nil { + defer cleanupAction() + } if err != nil { m.recordContainerEvent(pod, container, "", v1.EventTypeWarning, events.FailedToCreateContainer, "Error: %v", grpc.ErrorDesc(err)) return grpc.ErrorDesc(err), ErrCreateContainerConfig } + containerID, err := m.runtimeService.CreateContainer(podSandboxID, containerConfig, podSandboxConfig) if err != nil { m.recordContainerEvent(pod, container, containerID, v1.EventTypeWarning, events.FailedToCreateContainer, "Error: %v", grpc.ErrorDesc(err)) @@ -172,27 +176,27 @@ func (m *kubeGenericRuntimeManager) startContainer(podSandboxID string, podSandb } // generateContainerConfig generates container config for kubelet runtime v1. -func (m *kubeGenericRuntimeManager) generateContainerConfig(container *v1.Container, pod *v1.Pod, restartCount int, podIP, imageRef string, containerType kubecontainer.ContainerType) (*runtimeapi.ContainerConfig, error) { - opts, err := m.runtimeHelper.GenerateRunContainerOptions(pod, container, podIP) +func (m *kubeGenericRuntimeManager) generateContainerConfig(container *v1.Container, pod *v1.Pod, restartCount int, podIP, imageRef string, containerType kubecontainer.ContainerType) (*runtimeapi.ContainerConfig, func(), error) { + opts, cleanupAction, err := m.runtimeHelper.GenerateRunContainerOptions(pod, container, podIP) if err != nil { - return nil, err + return nil, nil, err } uid, username, err := m.getImageUser(container.Image) if err != nil { - return nil, err + return nil, cleanupAction, err } // Verify RunAsNonRoot. Non-root verification only supports numeric user. if err := verifyRunAsNonRoot(pod, container, uid, username); err != nil { - return nil, err + return nil, cleanupAction, err } command, args := kubecontainer.ExpandContainerCommandAndArgs(container, opts.Envs) logDir := BuildContainerLogsDirectory(kubetypes.UID(pod.UID), container.Name) err = m.osInterface.MkdirAll(logDir, 0755) if err != nil { - return nil, fmt.Errorf("create container log directory for container %s failed: %v", container.Name, err) + return nil, cleanupAction, fmt.Errorf("create container log directory for container %s failed: %v", container.Name, err) } containerLogsPath := buildContainerLogsPath(container.Name, restartCount) restartCountUint32 := uint32(restartCount) @@ -217,7 +221,7 @@ func (m *kubeGenericRuntimeManager) generateContainerConfig(container *v1.Contai // set platform specific configurations. if err := m.applyPlatformSpecificContainerConfig(config, container, pod, uid, username); err != nil { - return nil, err + return nil, cleanupAction, err } // set environment variables @@ -231,7 +235,7 @@ func (m *kubeGenericRuntimeManager) generateContainerConfig(container *v1.Contai } config.Envs = envs - return config, nil + return config, cleanupAction, nil } // makeDevices generates container devices for kubelet runtime v1. diff --git a/pkg/kubelet/kuberuntime/kuberuntime_container_linux_test.go b/pkg/kubelet/kuberuntime/kuberuntime_container_linux_test.go index 7bd9aafac834..3ab8803129b2 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_container_linux_test.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_container_linux_test.go @@ -32,7 +32,7 @@ func makeExpectedConfig(m *kubeGenericRuntimeManager, pod *v1.Pod, containerInde container := &pod.Spec.Containers[containerIndex] podIP := "" restartCount := 0 - opts, _ := m.runtimeHelper.GenerateRunContainerOptions(pod, container, podIP) + opts, _, _ := m.runtimeHelper.GenerateRunContainerOptions(pod, container, podIP) containerLogsPath := buildContainerLogsPath(container.Name, restartCount) restartCountUint32 := uint32(restartCount) envs := make([]*runtimeapi.KeyValue, len(opts.Envs)) @@ -84,7 +84,7 @@ func TestGenerateContainerConfig(t *testing.T) { } expectedConfig := makeExpectedConfig(m, pod, 0) - containerConfig, err := m.generateContainerConfig(&pod.Spec.Containers[0], pod, 0, "", pod.Spec.Containers[0].Image, kubecontainer.ContainerTypeRegular) + containerConfig, _, err := m.generateContainerConfig(&pod.Spec.Containers[0], pod, 0, "", pod.Spec.Containers[0].Image, kubecontainer.ContainerTypeRegular) assert.NoError(t, err) assert.Equal(t, expectedConfig, containerConfig, "generate container config for kubelet runtime v1.") @@ -113,7 +113,7 @@ func TestGenerateContainerConfig(t *testing.T) { }, } - _, err = m.generateContainerConfig(&podWithContainerSecurityContext.Spec.Containers[0], podWithContainerSecurityContext, 0, "", podWithContainerSecurityContext.Spec.Containers[0].Image, kubecontainer.ContainerTypeRegular) + _, _, err = m.generateContainerConfig(&podWithContainerSecurityContext.Spec.Containers[0], podWithContainerSecurityContext, 0, "", podWithContainerSecurityContext.Spec.Containers[0].Image, kubecontainer.ContainerTypeRegular) assert.Error(t, err) imageId, _ := imageService.PullImage(&runtimeapi.ImageSpec{Image: "busybox"}, nil) @@ -125,6 +125,6 @@ func TestGenerateContainerConfig(t *testing.T) { podWithContainerSecurityContext.Spec.Containers[0].SecurityContext.RunAsUser = nil podWithContainerSecurityContext.Spec.Containers[0].SecurityContext.RunAsNonRoot = &runAsNonRootTrue - _, err = m.generateContainerConfig(&podWithContainerSecurityContext.Spec.Containers[0], podWithContainerSecurityContext, 0, "", podWithContainerSecurityContext.Spec.Containers[0].Image, kubecontainer.ContainerTypeRegular) + _, _, err = m.generateContainerConfig(&podWithContainerSecurityContext.Spec.Containers[0], podWithContainerSecurityContext, 0, "", podWithContainerSecurityContext.Spec.Containers[0].Image, kubecontainer.ContainerTypeRegular) assert.Error(t, err, "RunAsNonRoot should fail for non-numeric username") } diff --git a/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go b/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go index f39d2f394215..b1de84e1c385 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go @@ -143,7 +143,7 @@ func makeFakeContainer(t *testing.T, m *kubeGenericRuntimeManager, template cont sandboxConfig, err := m.generatePodSandboxConfig(template.pod, template.sandboxAttempt) assert.NoError(t, err, "generatePodSandboxConfig for container template %+v", template) - containerConfig, err := m.generateContainerConfig(template.container, template.pod, template.attempt, "", template.container.Image, template.containerType) + containerConfig, _, err := m.generateContainerConfig(template.container, template.pod, template.attempt, "", template.container.Image, template.containerType) assert.NoError(t, err, "generateContainerConfig for container template %+v", template) podSandboxID := apitest.BuildSandboxName(sandboxConfig.Metadata) diff --git a/pkg/kubelet/rkt/rkt.go b/pkg/kubelet/rkt/rkt.go index b35288b4aab8..b15edf2c10f6 100644 --- a/pkg/kubelet/rkt/rkt.go +++ b/pkg/kubelet/rkt/rkt.go @@ -842,7 +842,7 @@ func (r *Runtime) newAppcRuntimeApp(pod *v1.Pod, podIP string, c v1.Container, r } // TODO: determine how this should be handled for rkt - opts, err := r.runtimeHelper.GenerateRunContainerOptions(pod, &c, podIP) + opts, _, err := r.runtimeHelper.GenerateRunContainerOptions(pod, &c, podIP) if err != nil { return err } diff --git a/pkg/kubelet/util/BUILD b/pkg/kubelet/util/BUILD index 261a3d44acd1..e76e870bb346 100644 --- a/pkg/kubelet/util/BUILD +++ b/pkg/kubelet/util/BUILD @@ -8,9 +8,13 @@ load( go_test( name = "go_default_test", - srcs = ["util_test.go"], + srcs = [ + "util_test.go", + ], embed = [":go_default_library"], - deps = ["//vendor/github.com/stretchr/testify/assert:go_default_library"], + deps = [ + "//vendor/github.com/stretchr/testify/assert:go_default_library", + ], ) go_library( diff --git a/pkg/kubelet/util/util.go b/pkg/kubelet/util/util.go index eb7cf142754b..cb70c07f86f7 100644 --- a/pkg/kubelet/util/util.go +++ b/pkg/kubelet/util/util.go @@ -19,6 +19,8 @@ package util import ( "fmt" "net/url" + "path/filepath" + "strings" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -45,3 +47,14 @@ func parseEndpoint(endpoint string) (string, string, error) { return u.Scheme, "", fmt.Errorf("protocol %q not supported", u.Scheme) } } + +func pathWithinBase(fullPath, basePath string) bool { + rel, err := filepath.Rel(basePath, fullPath) + if err != nil { + return false + } + if strings.HasPrefix(rel, "..") { + return false + } + return true +} diff --git a/pkg/kubelet/util/util_unsupported.go b/pkg/kubelet/util/util_unsupported.go index 5fd2e9c6896e..77f14ea5255a 100644 --- a/pkg/kubelet/util/util_unsupported.go +++ b/pkg/kubelet/util/util_unsupported.go @@ -31,3 +31,12 @@ func CreateListener(endpoint string) (net.Listener, error) { func GetAddressAndDialer(endpoint string) (string, func(addr string, timeout time.Duration) (net.Conn, error), error) { return "", nil, fmt.Errorf("GetAddressAndDialer is unsupported in this build") } + +// LockAndCheckSubPath empty implementation +func LockAndCheckSubPath(volumePath, subPath string) ([]uintptr, error) { + return []uintptr{}, nil +} + +// UnlockPath empty implementation +func UnlockPath(fileHandles []uintptr) { +} diff --git a/pkg/kubelet/volumemanager/reconciler/reconciler.go b/pkg/kubelet/volumemanager/reconciler/reconciler.go index 4cec404ac3ea..a1266a9e1822 100644 --- a/pkg/kubelet/volumemanager/reconciler/reconciler.go +++ b/pkg/kubelet/volumemanager/reconciler/reconciler.go @@ -169,7 +169,7 @@ func (rc *reconciler) reconcile() { // Volume is mounted, unmount it glog.V(5).Infof(mountedVolume.GenerateMsgDetailed("Starting operationExecutor.UnmountVolume", "")) err := rc.operationExecutor.UnmountVolume( - mountedVolume.MountedVolume, rc.actualStateOfWorld) + mountedVolume.MountedVolume, rc.actualStateOfWorld, rc.kubeletPodsDir) if err != nil && !nestedpendingoperations.IsAlreadyExists(err) && !exponentialbackoff.IsExponentialBackoff(err) { @@ -401,7 +401,7 @@ func (rc *reconciler) cleanupMounts(volume podVolume) { } // TODO: Currently cleanupMounts only includes UnmountVolume operation. In the next PR, we will add // to unmount both volume and device in the same routine. - err := rc.operationExecutor.UnmountVolume(mountedVolume, rc.actualStateOfWorld) + err := rc.operationExecutor.UnmountVolume(mountedVolume, rc.actualStateOfWorld, rc.kubeletPodsDir) if err != nil { glog.Errorf(mountedVolume.GenerateErrorDetailed(fmt.Sprintf("volumeHandler.UnmountVolumeHandler for UnmountVolume failed"), err).Error()) return diff --git a/pkg/kubelet/volumemanager/reconciler/reconciler_test.go b/pkg/kubelet/volumemanager/reconciler/reconciler_test.go index 0c6a20af6e11..4e49d2ba7e15 100644 --- a/pkg/kubelet/volumemanager/reconciler/reconciler_test.go +++ b/pkg/kubelet/volumemanager/reconciler/reconciler_test.go @@ -908,7 +908,7 @@ func Test_GenerateUnmapVolumeFunc_Plugin_Not_Found(t *testing.T) { volumeToUnmount := operationexecutor.MountedVolume{ PluginName: "fake-file-plugin", VolumeSpec: tmpSpec} - err := oex.UnmountVolume(volumeToUnmount, asw) + err := oex.UnmountVolume(volumeToUnmount, asw, "" /* podsDir */) // Assert if assert.Error(t, err) { assert.Contains(t, err.Error(), tc.expectedErrMsg) diff --git a/pkg/util/mount/BUILD b/pkg/util/mount/BUILD index 92966a59f7fa..3b2a78a5651a 100644 --- a/pkg/util/mount/BUILD +++ b/pkg/util/mount/BUILD @@ -1,10 +1,4 @@ -package(default_visibility = ["//visibility:public"]) - -load( - "@io_bazel_rules_go//go:def.bzl", - "go_library", - "go_test", -) +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") go_library( name = "go_default_library", @@ -72,6 +66,7 @@ go_library( "//conditions:default": [], }), importpath = "k8s.io/kubernetes/pkg/util/mount", + visibility = ["//visibility:public"], deps = [ "//vendor/github.com/golang/glog:go_default_library", "//vendor/k8s.io/utils/exec:go_default_library", @@ -104,7 +99,15 @@ go_test( embed = [":go_default_library"], deps = [ "//vendor/k8s.io/utils/exec/testing:go_default_library", - ], + ] + select({ + "@io_bazel_rules_go//go/platform:linux": [ + "//vendor/github.com/golang/glog:go_default_library", + ], + "@io_bazel_rules_go//go/platform:windows": [ + "//vendor/github.com/stretchr/testify/assert:go_default_library", + ], + "//conditions:default": [], + }), ) filegroup( @@ -118,4 +121,5 @@ filegroup( name = "all-srcs", srcs = [":package-srcs"], tags = ["automanaged"], + visibility = ["//visibility:public"], ) diff --git a/pkg/util/mount/exec_mount.go b/pkg/util/mount/exec_mount.go index b12a2be38a55..574174af853a 100644 --- a/pkg/util/mount/exec_mount.go +++ b/pkg/util/mount/exec_mount.go @@ -20,6 +20,7 @@ package mount import ( "fmt" + "os" "github.com/golang/glog" ) @@ -138,3 +139,15 @@ func (m *execMounter) MakeDir(pathname string) error { func (m *execMounter) ExistsPath(pathname string) bool { return m.wrappedMounter.ExistsPath(pathname) } + +func (m *execMounter) PrepareSafeSubpath(subPath Subpath) (newHostPath string, cleanupAction func(), err error) { + return m.wrappedMounter.PrepareSafeSubpath(subPath) +} + +func (m *execMounter) CleanSubPaths(podDir string, volumeName string) error { + return m.wrappedMounter.CleanSubPaths(podDir, volumeName) +} + +func (m *execMounter) SafeMakeDir(pathname string, base string, perm os.FileMode) error { + return m.wrappedMounter.SafeMakeDir(pathname, base, perm) +} diff --git a/pkg/util/mount/exec_mount_test.go b/pkg/util/mount/exec_mount_test.go index 5882477f71e2..248f49631bfd 100644 --- a/pkg/util/mount/exec_mount_test.go +++ b/pkg/util/mount/exec_mount_test.go @@ -20,6 +20,7 @@ package mount import ( "fmt" + "os" "reflect" "strings" "testing" @@ -151,3 +152,14 @@ func (fm *fakeMounter) ExistsPath(pathname string) bool { func (fm *fakeMounter) GetFileType(pathname string) (FileType, error) { return FileTypeFile, nil } +func (fm *fakeMounter) PrepareSafeSubpath(subPath Subpath) (newHostPath string, cleanupAction func(), err error) { + return subPath.Path, nil, nil +} + +func (fm *fakeMounter) CleanSubPaths(podDir string, volumeName string) error { + return nil +} + +func (fm *fakeMounter) SafeMakeDir(pathname string, base string, perm os.FileMode) error { + return nil +} diff --git a/pkg/util/mount/exec_mount_unsupported.go b/pkg/util/mount/exec_mount_unsupported.go index 136704b23e23..666a57063e00 100644 --- a/pkg/util/mount/exec_mount_unsupported.go +++ b/pkg/util/mount/exec_mount_unsupported.go @@ -20,6 +20,7 @@ package mount import ( "errors" + "os" ) type execMounter struct{} @@ -85,3 +86,15 @@ func (mounter *execMounter) MakeFile(pathname string) error { func (mounter *execMounter) ExistsPath(pathname string) bool { return true } + +func (mounter *execMounter) PrepareSafeSubpath(subPath Subpath) (newHostPath string, cleanupAction func(), err error) { + return subPath.Path, nil, nil +} + +func (mounter *execMounter) CleanSubPaths(podDir string, volumeName string) error { + return nil +} + +func (mounter *execMounter) SafeMakeDir(pathname string, base string, perm os.FileMode) error { + return nil +} diff --git a/pkg/util/mount/fake.go b/pkg/util/mount/fake.go index f4e2e411de1f..4bc32ff21cfe 100644 --- a/pkg/util/mount/fake.go +++ b/pkg/util/mount/fake.go @@ -197,3 +197,14 @@ func (f *FakeMounter) MakeFile(pathname string) error { func (f *FakeMounter) ExistsPath(pathname string) bool { return false } + +func (f *FakeMounter) PrepareSafeSubpath(subPath Subpath) (newHostPath string, cleanupAction func(), err error) { + return subPath.Path, nil, nil +} + +func (f *FakeMounter) CleanSubPaths(podDir string, volumeName string) error { + return nil +} +func (mounter *FakeMounter) SafeMakeDir(pathname string, base string, perm os.FileMode) error { + return nil +} diff --git a/pkg/util/mount/mount.go b/pkg/util/mount/mount.go index 0f9bac03fb79..f241679b565c 100644 --- a/pkg/util/mount/mount.go +++ b/pkg/util/mount/mount.go @@ -22,6 +22,7 @@ import ( "os" "path/filepath" "strings" + "syscall" ) type FileType string @@ -83,9 +84,45 @@ type Interface interface { // MakeDir creates a new directory. // Will operate in the host mount namespace if kubelet is running in a container MakeDir(pathname string) error + // SafeMakeDir makes sure that the created directory does not escape given + // base directory mis-using symlinks. The directory is created in the same + // mount namespace as where kubelet is running. Note that the function makes + // sure that it creates the directory somewhere under the base, nothing + // else. E.g. if the directory already exists, it may exists outside of the + // base due to symlinks. + SafeMakeDir(pathname string, base string, perm os.FileMode) error // ExistsPath checks whether the path exists. // Will operate in the host mount namespace if kubelet is running in a container ExistsPath(pathname string) bool + // CleanSubPaths removes any bind-mounts created by PrepareSafeSubpath in given + // pod volume directory. + CleanSubPaths(podDir string, volumeName string) error + // PrepareSafeSubpath does everything that's necessary to prepare a subPath + // that's 1) inside given volumePath and 2) immutable after this call. + // + // newHostPath - location of prepared subPath. It should be used instead of + // hostName when running the container. + // cleanupAction - action to run when the container is running or it failed to start. + // + // CleanupAction must be called immediately after the container with given + // subpath starts. On the other hand, Interface.CleanSubPaths must be called + // when the pod finishes. + PrepareSafeSubpath(subPath Subpath) (newHostPath string, cleanupAction func(), err error) +} + +type Subpath struct { + // index of the VolumeMount for this container + VolumeMountIndex int + // Full path to the subpath directory on the host + Path string + // name of the volume that is a valid directory name. + VolumeName string + // Full path to the volume path + VolumePath string + // Path to the pod's directory, including pod UID + PodDir string + // Name of the container + ContainerName string } // Exec executes command where mount utilities are. This can be either the host, @@ -195,6 +232,13 @@ func GetDeviceNameFromMount(mounter Interface, mountPath string) (string, int, e return device, refCount, nil } +func isNotDirErr(err error) bool { + if e, ok := err.(*os.PathError); ok && e.Err == syscall.ENOTDIR { + return true + } + return false +} + // IsNotMountPoint determines if a directory is a mountpoint. // It should return ErrNotExist when the directory does not exist. // This method uses the List() of all mountpoints @@ -210,7 +254,7 @@ func IsNotMountPoint(mounter Interface, file string) (bool, error) { notMnt = true notMntErr = nil } - if notMntErr != nil { + if notMntErr != nil && isNotDirErr(notMntErr) { return notMnt, notMntErr } // identified as mountpoint, so return this fact @@ -274,3 +318,16 @@ func HasMountRefs(mountPath string, mountRefs []string) bool { } return count > 0 } + +// pathWithinBase checks if give path is within given base directory. +func pathWithinBase(fullPath, basePath string) bool { + rel, err := filepath.Rel(basePath, fullPath) + if err != nil { + return false + } + if strings.HasPrefix(rel, "..") { + // Needed to escape the base path + return false + } + return true +} diff --git a/pkg/util/mount/mount_linux.go b/pkg/util/mount/mount_linux.go index 11835432d7aa..d45123076208 100644 --- a/pkg/util/mount/mount_linux.go +++ b/pkg/util/mount/mount_linux.go @@ -21,6 +21,7 @@ package mount import ( "errors" "fmt" + "io/ioutil" "os" "os/exec" "path" @@ -49,6 +50,11 @@ const ( fsckErrorsCorrected = 1 // 'fsck' found errors but exited without correcting them fsckErrorsUncorrected = 4 + + // place for subpath mounts + containerSubPathDirectoryName = "volume-subpaths" + // syscall.Openat flags used to traverse directories not following symlinks + nofollowFlags = syscall.O_RDONLY | syscall.O_NOFOLLOW ) // Mounter provides the default implementation of mount.Interface @@ -636,6 +642,7 @@ func isShared(path string, filename string) (bool, error) { } type mountInfo struct { + // Path of the mount point mountPoint string // list of "optional parameters", mount propagation is one of them optional []string @@ -655,6 +662,7 @@ func parseMountInfo(filename string) ([]mountInfo, error) { // the last split() item is empty string following the last \n continue } + // See `man proc` for authoritative description of format of the file. fields := strings.Fields(line) if len(fields) < 7 { return nil, fmt.Errorf("wrong number of fields in (expected %d, got %d): %s", 8, len(fields), line) @@ -663,6 +671,7 @@ func parseMountInfo(filename string) ([]mountInfo, error) { mountPoint: fields[4], optional: []string{}, } + // All fields until "-" are "optional fields". for i := 6; i < len(fields) && fields[i] != "-"; i++ { info.optional = append(info.optional, fields[i]) } @@ -698,3 +707,439 @@ func doMakeRShared(path string, mountInfoFilename string) error { return nil } + +func (mounter *Mounter) PrepareSafeSubpath(subPath Subpath) (newHostPath string, cleanupAction func(), err error) { + newHostPath, err = doBindSubPath(mounter, subPath, os.Getpid()) + // There is no action when the container starts. Bind-mount will be cleaned + // when container stops by CleanSubPaths. + cleanupAction = nil + return newHostPath, cleanupAction, err +} + +// This implementation is shared between Linux and NsEnterMounter +// kubeletPid is PID of kubelet in the PID namespace where bind-mount is done, +// i.e. pid on the *host* if kubelet runs in a container. +func doBindSubPath(mounter Interface, subpath Subpath, kubeletPid int) (hostPath string, err error) { + // Check early for symlink. This is just a pre-check to avoid bind-mount + // before the final check. + evalSubPath, err := filepath.EvalSymlinks(subpath.Path) + if err != nil { + return "", fmt.Errorf("evalSymlinks %q failed: %v", subpath.Path, err) + } + glog.V(5).Infof("doBindSubPath %q, full subpath %q for volumepath %q", subpath.Path, evalSubPath, subpath.VolumePath) + + evalSubPath = filepath.Clean(evalSubPath) + if !pathWithinBase(evalSubPath, subpath.VolumePath) { + return "", fmt.Errorf("subpath %q not within volume path %q", evalSubPath, subpath.VolumePath) + } + + // Prepare directory for bind mounts + // containerName is DNS label, i.e. safe as a directory name. + bindDir := filepath.Join(subpath.PodDir, containerSubPathDirectoryName, subpath.VolumeName, subpath.ContainerName) + err = os.MkdirAll(bindDir, 0750) + if err != nil && !os.IsExist(err) { + return "", fmt.Errorf("error creating directory %s: %s", bindDir, err) + } + bindPathTarget := filepath.Join(bindDir, strconv.Itoa(subpath.VolumeMountIndex)) + + success := false + defer func() { + // Cleanup subpath on error + if !success { + glog.V(4).Infof("doBindSubPath() failed for %q, cleaning up subpath", bindPathTarget) + if cleanErr := cleanSubPath(mounter, subpath); cleanErr != nil { + glog.Errorf("Failed to clean subpath %q: %v", bindPathTarget, cleanErr) + } + } + }() + + // Check it's not already bind-mounted + notMount, err := IsNotMountPoint(mounter, bindPathTarget) + if err != nil { + if !os.IsNotExist(err) { + return "", fmt.Errorf("error checking path %s for mount: %s", bindPathTarget, err) + } + // Ignore ErrorNotExist: the file/directory will be created below if it does not exist yet. + notMount = true + } + if !notMount { + // It's already mounted + glog.V(5).Infof("Skipping bind-mounting subpath %s: already mounted", bindPathTarget) + success = true + return bindPathTarget, nil + } + + // Create target of the bind mount. A directory for directories, empty file + // for everything else. + t, err := os.Lstat(subpath.Path) + if err != nil { + return "", fmt.Errorf("lstat %s failed: %s", subpath.Path, err) + } + if t.Mode()&os.ModeDir > 0 { + if err = os.Mkdir(bindPathTarget, 0750); err != nil && !os.IsExist(err) { + return "", fmt.Errorf("error creating directory %s: %s", bindPathTarget, err) + } + } else { + // "/bin/touch ". + // A file is enough for all possible targets (symlink, device, pipe, + // socket, ...), bind-mounting them into a file correctly changes type + // of the target file. + if err = ioutil.WriteFile(bindPathTarget, []byte{}, 0640); err != nil { + return "", fmt.Errorf("error creating file %s: %s", bindPathTarget, err) + } + } + + // Safe open subpath and get the fd + fd, err := doSafeOpen(evalSubPath, subpath.VolumePath) + if err != nil { + return "", fmt.Errorf("error opening subpath %v: %v", evalSubPath, err) + } + defer syscall.Close(fd) + + mountSource := fmt.Sprintf("/proc/%d/fd/%v", kubeletPid, fd) + + // Do the bind mount + glog.V(5).Infof("bind mounting %q at %q", mountSource, bindPathTarget) + if err = mounter.Mount(mountSource, bindPathTarget, "" /*fstype*/, []string{"bind"}); err != nil { + return "", fmt.Errorf("error mounting %s: %s", subpath.Path, err) + } + + success = true + glog.V(3).Infof("Bound SubPath %s into %s", subpath.Path, bindPathTarget) + return bindPathTarget, nil +} + +func (mounter *Mounter) CleanSubPaths(podDir string, volumeName string) error { + return doCleanSubPaths(mounter, podDir, volumeName) +} + +// This implementation is shared between Linux and NsEnterMounter +func doCleanSubPaths(mounter Interface, podDir string, volumeName string) error { + glog.V(4).Infof("Cleaning up subpath mounts for %s", podDir) + // scan /var/lib/kubelet/pods//volume-subpaths//* + subPathDir := filepath.Join(podDir, containerSubPathDirectoryName, volumeName) + containerDirs, err := ioutil.ReadDir(subPathDir) + if err != nil { + if os.IsNotExist(err) { + return nil + } + return fmt.Errorf("error reading %s: %s", subPathDir, err) + } + + for _, containerDir := range containerDirs { + if !containerDir.IsDir() { + glog.V(4).Infof("Container file is not a directory: %s", containerDir.Name()) + continue + } + glog.V(4).Infof("Cleaning up subpath mounts for container %s", containerDir.Name()) + + // scan /var/lib/kubelet/pods//volume-subpaths///* + fullContainerDirPath := filepath.Join(subPathDir, containerDir.Name()) + subPaths, err := ioutil.ReadDir(fullContainerDirPath) + if err != nil { + return fmt.Errorf("error reading %s: %s", fullContainerDirPath, err) + } + for _, subPath := range subPaths { + if err = doCleanSubPath(mounter, fullContainerDirPath, subPath.Name()); err != nil { + return err + } + } + // Whole container has been processed, remove its directory. + if err := os.Remove(fullContainerDirPath); err != nil { + return fmt.Errorf("error deleting %s: %s", fullContainerDirPath, err) + } + glog.V(5).Infof("Removed %s", fullContainerDirPath) + } + // Whole pod volume subpaths have been cleaned up, remove its subpath directory. + if err := os.Remove(subPathDir); err != nil { + return fmt.Errorf("error deleting %s: %s", subPathDir, err) + } + glog.V(5).Infof("Removed %s", subPathDir) + + // Remove entire subpath directory if it's the last one + podSubPathDir := filepath.Join(podDir, containerSubPathDirectoryName) + if err := os.Remove(podSubPathDir); err != nil && !os.IsExist(err) { + return fmt.Errorf("error deleting %s: %s", podSubPathDir, err) + } + glog.V(5).Infof("Removed %s", podSubPathDir) + return nil +} + +// doCleanSubPath tears down the single subpath bind mount +func doCleanSubPath(mounter Interface, fullContainerDirPath, subPathIndex string) error { + // process /var/lib/kubelet/pods//volume-subpaths/// + glog.V(4).Infof("Cleaning up subpath mounts for subpath %v", subPathIndex) + fullSubPath := filepath.Join(fullContainerDirPath, subPathIndex) + notMnt, err := IsNotMountPoint(mounter, fullSubPath) + if err != nil { + return fmt.Errorf("error checking %s for mount: %s", fullSubPath, err) + } + // Unmount it + if !notMnt { + if err = mounter.Unmount(fullSubPath); err != nil { + return fmt.Errorf("error unmounting %s: %s", fullSubPath, err) + } + glog.V(5).Infof("Unmounted %s", fullSubPath) + } + // Remove it *non*-recursively, just in case there were some hiccups. + if err = os.Remove(fullSubPath); err != nil { + return fmt.Errorf("error deleting %s: %s", fullSubPath, err) + } + glog.V(5).Infof("Removed %s", fullSubPath) + return nil +} + +// cleanSubPath will teardown the subpath bind mount and any remove any directories if empty +func cleanSubPath(mounter Interface, subpath Subpath) error { + containerDir := filepath.Join(subpath.PodDir, containerSubPathDirectoryName, subpath.VolumeName, subpath.ContainerName) + + // Clean subdir bindmount + if err := doCleanSubPath(mounter, containerDir, strconv.Itoa(subpath.VolumeMountIndex)); err != nil && !os.IsNotExist(err) { + return err + } + + // Recusively remove directories if empty + if err := removeEmptyDirs(subpath.PodDir, containerDir); err != nil { + return err + } + + return nil +} + +// removeEmptyDirs works backwards from endDir to baseDir and removes each directory +// if it is empty. It stops once it encounters a directory that has content +func removeEmptyDirs(baseDir, endDir string) error { + if !pathWithinBase(endDir, baseDir) { + return fmt.Errorf("endDir %q is not within baseDir %q", endDir, baseDir) + } + + for curDir := endDir; curDir != baseDir; curDir = filepath.Dir(curDir) { + s, err := os.Stat(curDir) + if err != nil { + if os.IsNotExist(err) { + glog.V(5).Infof("curDir %q doesn't exist, skipping", curDir) + continue + } + return fmt.Errorf("error stat %q: %v", curDir, err) + } + if !s.IsDir() { + return fmt.Errorf("path %q not a directory", curDir) + } + + err = os.Remove(curDir) + if os.IsExist(err) { + glog.V(5).Infof("Directory %q not empty, not removing", curDir) + break + } else if err != nil { + return fmt.Errorf("error removing directory %q: %v", curDir, err) + } + glog.V(5).Infof("Removed directory %q", curDir) + } + return nil +} + +func (mounter *Mounter) SafeMakeDir(pathname string, base string, perm os.FileMode) error { + return doSafeMakeDir(pathname, base, perm) +} + +// This implementation is shared between Linux and NsEnterMounter +func doSafeMakeDir(pathname string, base string, perm os.FileMode) error { + glog.V(4).Infof("Creating directory %q within base %q", pathname, base) + + if !pathWithinBase(pathname, base) { + return fmt.Errorf("path %s is outside of allowed base %s", pathname, base) + } + + // Quick check if the directory already exists + s, err := os.Stat(pathname) + if err == nil { + // Path exists + if s.IsDir() { + // The directory already exists. It can be outside of the parent, + // but there is no race-proof check. + glog.V(4).Infof("Directory %s already exists", pathname) + return nil + } + return &os.PathError{Op: "mkdir", Path: pathname, Err: syscall.ENOTDIR} + } + + // Find all existing directories + existingPath, toCreate, err := findExistingPrefix(base, pathname) + if err != nil { + return fmt.Errorf("error opening directory %s: %s", pathname, err) + } + // Ensure the existing directory is inside allowed base + fullExistingPath, err := filepath.EvalSymlinks(existingPath) + if err != nil { + return fmt.Errorf("error opening directory %s: %s", existingPath, err) + } + if !pathWithinBase(fullExistingPath, base) { + return fmt.Errorf("path %s is outside of allowed base %s", fullExistingPath, err) + } + + glog.V(4).Infof("%q already exists, %q to create", fullExistingPath, filepath.Join(toCreate...)) + parentFD, err := doSafeOpen(fullExistingPath, base) + if err != nil { + return fmt.Errorf("cannot open directory %s: %s", existingPath, err) + } + childFD := -1 + defer func() { + if parentFD != -1 { + if err = syscall.Close(parentFD); err != nil { + glog.V(4).Infof("Closing FD %v failed for safemkdir(%v): %v", parentFD, pathname, err) + } + } + if childFD != -1 { + if err = syscall.Close(childFD); err != nil { + glog.V(4).Infof("Closing FD %v failed for safemkdir(%v): %v", childFD, pathname, err) + } + } + }() + + currentPath := fullExistingPath + // create the directories one by one, making sure nobody can change + // created directory into symlink. + for _, dir := range toCreate { + currentPath = filepath.Join(currentPath, dir) + glog.V(4).Infof("Creating %s", dir) + err = syscall.Mkdirat(parentFD, currentPath, uint32(perm)) + if err != nil { + return fmt.Errorf("cannot create directory %s: %s", currentPath, err) + } + // Dive into the created directory + childFD, err := syscall.Openat(parentFD, dir, nofollowFlags, 0) + if err != nil { + return fmt.Errorf("cannot open %s: %s", currentPath, err) + } + // We can be sure that childFD is safe to use. It could be changed + // by user after Mkdirat() and before Openat(), however: + // - it could not be changed to symlink - we use nofollowFlags + // - it could be changed to a file (or device, pipe, socket, ...) + // but either subsequent Mkdirat() fails or we mount this file + // to user's container. Security is no violated in both cases + // and user either gets error or the file that it can already access. + + if err = syscall.Close(parentFD); err != nil { + glog.V(4).Infof("Closing FD %v failed for safemkdir(%v): %v", parentFD, pathname, err) + } + parentFD = childFD + childFD = -1 + } + + // Everything was created. mkdirat(..., perm) above was affected by current + // umask and we must apply the right permissions to the last directory + // (that's the one that will be available to the container as subpath) + // so user can read/write it. This is the behavior of previous code. + // TODO: chmod all created directories, not just the last one. + // parentFD is the last created directory. + if err = syscall.Fchmod(parentFD, uint32(perm)&uint32(os.ModePerm)); err != nil { + return fmt.Errorf("chmod %q failed: %s", currentPath, err) + } + return nil +} + +// findExistingPrefix finds prefix of pathname that exists. In addition, it +// returns list of remaining directories that don't exist yet. +func findExistingPrefix(base, pathname string) (string, []string, error) { + rel, err := filepath.Rel(base, pathname) + if err != nil { + return base, nil, err + } + dirs := strings.Split(rel, string(filepath.Separator)) + + // Do OpenAt in a loop to find the first non-existing dir. Resolve symlinks. + // This should be faster than looping through all dirs and calling os.Stat() + // on each of them, as the symlinks are resolved only once with OpenAt(). + currentPath := base + fd, err := syscall.Open(currentPath, syscall.O_RDONLY, 0) + if err != nil { + return pathname, nil, fmt.Errorf("error opening %s: %s", currentPath, err) + } + defer func() { + if err = syscall.Close(fd); err != nil { + glog.V(4).Infof("Closing FD %v failed for findExistingPrefix(%v): %v", fd, pathname, err) + } + }() + for i, dir := range dirs { + childFD, err := syscall.Openat(fd, dir, syscall.O_RDONLY, 0) + if err != nil { + if os.IsNotExist(err) { + return currentPath, dirs[i:], nil + } + return base, nil, err + } + if err = syscall.Close(fd); err != nil { + glog.V(4).Infof("Closing FD %v failed for findExistingPrefix(%v): %v", fd, pathname, err) + } + fd = childFD + currentPath = filepath.Join(currentPath, dir) + } + return pathname, []string{}, nil +} + +// This implementation is shared between Linux and NsEnterMounter +// Open path and return its fd. +// Symlinks are disallowed (pathname must already resolve symlinks), +// and the path must be within the base directory. +func doSafeOpen(pathname string, base string) (int, error) { + // Calculate segments to follow + subpath, err := filepath.Rel(base, pathname) + if err != nil { + return -1, err + } + segments := strings.Split(subpath, string(filepath.Separator)) + + // Assumption: base is the only directory that we have under control. + // Base dir is not allowed to be a symlink. + parentFD, err := syscall.Open(base, nofollowFlags, 0) + if err != nil { + return -1, fmt.Errorf("cannot open directory %s: %s", base, err) + } + defer func() { + if parentFD != -1 { + if err = syscall.Close(parentFD); err != nil { + glog.V(4).Infof("Closing FD %v failed for safeopen(%v): %v", parentFD, pathname, err) + } + } + }() + + childFD := -1 + defer func() { + if childFD != -1 { + if err = syscall.Close(childFD); err != nil { + glog.V(4).Infof("Closing FD %v failed for safeopen(%v): %v", childFD, pathname, err) + } + } + }() + + currentPath := base + + // Follow the segments one by one using openat() to make + // sure the user cannot change already existing directories into symlinks. + for _, seg := range segments { + currentPath = filepath.Join(currentPath, seg) + if !pathWithinBase(currentPath, base) { + return -1, fmt.Errorf("path %s is outside of allowed base %s", currentPath, base) + } + + glog.V(5).Infof("Opening path %s", currentPath) + childFD, err = syscall.Openat(parentFD, seg, nofollowFlags, 0) + if err != nil { + return -1, fmt.Errorf("cannot open %s: %s", currentPath, err) + } + + // Close parentFD + if err = syscall.Close(parentFD); err != nil { + return -1, fmt.Errorf("closing fd for %q failed: %v", filepath.Dir(currentPath), err) + } + // Set child to new parent + parentFD = childFD + childFD = -1 + } + + // We made it to the end, return this fd, don't close it + finalFD := parentFD + parentFD = -1 + + return finalFD, nil +} diff --git a/pkg/util/mount/mount_linux_test.go b/pkg/util/mount/mount_linux_test.go index 6d5d6e961fd1..25bdfc6fe6f7 100644 --- a/pkg/util/mount/mount_linux_test.go +++ b/pkg/util/mount/mount_linux_test.go @@ -19,11 +19,17 @@ limitations under the License. package mount import ( + "fmt" "io/ioutil" "os" "path/filepath" "reflect" + "syscall" "testing" + + "strconv" + + "github.com/golang/glog" ) func TestReadProcMountsFrom(t *testing.T) { @@ -322,3 +328,1203 @@ func TestIsSharedFailure(t *testing.T) { } } } + +func TestPathWithinBase(t *testing.T) { + tests := []struct { + name string + fullPath string + basePath string + expected bool + }{ + { + name: "good subpath", + fullPath: "/a/b/c", + basePath: "/a", + expected: true, + }, + { + name: "good subpath 2", + fullPath: "/a/b/c", + basePath: "/a/b", + expected: true, + }, + { + name: "good subpath end slash", + fullPath: "/a/b/c/", + basePath: "/a/b", + expected: true, + }, + { + name: "good subpath backticks", + fullPath: "/a/b/../c", + basePath: "/a", + expected: true, + }, + { + name: "good subpath equal", + fullPath: "/a/b/c", + basePath: "/a/b/c", + expected: true, + }, + { + name: "good subpath equal 2", + fullPath: "/a/b/c/", + basePath: "/a/b/c", + expected: true, + }, + { + name: "good subpath root", + fullPath: "/a", + basePath: "/", + expected: true, + }, + { + name: "bad subpath parent", + fullPath: "/a/b/c", + basePath: "/a/b/c/d", + expected: false, + }, + { + name: "bad subpath outside", + fullPath: "/b/c", + basePath: "/a/b/c", + expected: false, + }, + { + name: "bad subpath prefix", + fullPath: "/a/b/cd", + basePath: "/a/b/c", + expected: false, + }, + { + name: "bad subpath backticks", + fullPath: "/a/../b", + basePath: "/a", + expected: false, + }, + } + for _, test := range tests { + if pathWithinBase(test.fullPath, test.basePath) != test.expected { + t.Errorf("test %q failed: expected %v", test.name, test.expected) + } + + } +} + +func TestSafeMakeDir(t *testing.T) { + defaultPerm := os.FileMode(0750) + tests := []struct { + name string + // Function that prepares directory structure for the test under given + // base. + prepare func(base string) error + path string + checkPath string + expectError bool + }{ + { + "directory-does-not-exist", + func(base string) error { + return nil + }, + "test/directory", + "test/directory", + false, + }, + { + "directory-exists", + func(base string) error { + return os.MkdirAll(filepath.Join(base, "test/directory"), 0750) + }, + "test/directory", + "test/directory", + false, + }, + { + "create-base", + func(base string) error { + return nil + }, + "", + "", + false, + }, + { + "escape-base-using-dots", + func(base string) error { + return nil + }, + "..", + "", + true, + }, + { + "escape-base-using-dots-2", + func(base string) error { + return nil + }, + "test/../../..", + "", + true, + }, + { + "follow-symlinks", + func(base string) error { + if err := os.MkdirAll(filepath.Join(base, "destination"), defaultPerm); err != nil { + return err + } + return os.Symlink("destination", filepath.Join(base, "test")) + }, + "test/directory", + "destination/directory", + false, + }, + { + "follow-symlink-loop", + func(base string) error { + return os.Symlink("test", filepath.Join(base, "test")) + }, + "test/directory", + "", + true, + }, + { + "follow-symlink-multiple follow", + func(base string) error { + /* test1/dir points to test2 and test2/dir points to test1 */ + if err := os.MkdirAll(filepath.Join(base, "test1"), defaultPerm); err != nil { + return err + } + if err := os.MkdirAll(filepath.Join(base, "test2"), defaultPerm); err != nil { + return err + } + if err := os.Symlink(filepath.Join(base, "test2"), filepath.Join(base, "test1/dir")); err != nil { + return err + } + if err := os.Symlink(filepath.Join(base, "test1"), filepath.Join(base, "test2/dir")); err != nil { + return err + } + return nil + }, + "test1/dir/dir/dir/dir/dir/dir/dir/foo", + "test2/foo", + false, + }, + { + "danglink-symlink", + func(base string) error { + return os.Symlink("non-existing", filepath.Join(base, "test")) + }, + "test/directory", + "", + true, + }, + { + "non-directory", + func(base string) error { + return ioutil.WriteFile(filepath.Join(base, "test"), []byte{}, defaultPerm) + }, + "test/directory", + "", + true, + }, + { + "non-directory-final", + func(base string) error { + return ioutil.WriteFile(filepath.Join(base, "test"), []byte{}, defaultPerm) + }, + "test", + "", + true, + }, + { + "escape-with-relative-symlink", + func(base string) error { + if err := os.MkdirAll(filepath.Join(base, "dir"), defaultPerm); err != nil { + return err + } + if err := os.MkdirAll(filepath.Join(base, "exists"), defaultPerm); err != nil { + return err + } + return os.Symlink("../exists", filepath.Join(base, "dir/test")) + }, + "dir/test", + "", + false, + }, + { + "escape-with-relative-symlink-not-exists", + func(base string) error { + if err := os.MkdirAll(filepath.Join(base, "dir"), defaultPerm); err != nil { + return err + } + return os.Symlink("../not-exists", filepath.Join(base, "dir/test")) + }, + "dir/test", + "", + true, + }, + { + "escape-with-symlink", + func(base string) error { + return os.Symlink("/", filepath.Join(base, "test")) + }, + "test/directory", + "", + true, + }, + } + + for _, test := range tests { + glog.V(4).Infof("test %q", test.name) + base, err := ioutil.TempDir("", "safe-make-dir-"+test.name+"-") + if err != nil { + t.Fatalf(err.Error()) + } + test.prepare(base) + pathToCreate := filepath.Join(base, test.path) + err = doSafeMakeDir(pathToCreate, base, defaultPerm) + if err != nil && !test.expectError { + t.Errorf("test %q: %s", test.name, err) + } + if err != nil { + glog.Infof("got error: %s", err) + } + if err == nil && test.expectError { + t.Errorf("test %q: expected error, got none", test.name) + } + + if test.checkPath != "" { + if _, err := os.Stat(filepath.Join(base, test.checkPath)); err != nil { + t.Errorf("test %q: cannot read path %s", test.name, test.checkPath) + } + } + + os.RemoveAll(base) + } + +} + +func validateDirEmpty(dir string) error { + files, err := ioutil.ReadDir(dir) + if err != nil { + return err + } + + if len(files) != 0 { + return fmt.Errorf("Directory %q is not empty", dir) + } + return nil +} + +func validateDirExists(dir string) error { + _, err := ioutil.ReadDir(dir) + if err != nil { + return err + } + return nil +} + +func validateDirNotExists(dir string) error { + _, err := ioutil.ReadDir(dir) + if os.IsNotExist(err) { + return nil + } + if err != nil { + return err + } + return fmt.Errorf("dir %q still exists", dir) +} + +func validateFileExists(file string) error { + if _, err := os.Stat(file); err != nil { + return err + } + return nil +} + +func TestRemoveEmptyDirs(t *testing.T) { + defaultPerm := os.FileMode(0750) + tests := []struct { + name string + // Function that prepares directory structure for the test under given + // base. + prepare func(base string) error + // Function that validates directory structure after the test + validate func(base string) error + baseDir string + endDir string + expectError bool + }{ + { + name: "all-empty", + prepare: func(base string) error { + return os.MkdirAll(filepath.Join(base, "a/b/c"), defaultPerm) + }, + validate: func(base string) error { + return validateDirEmpty(filepath.Join(base, "a")) + }, + baseDir: "a", + endDir: "a/b/c", + expectError: false, + }, + { + name: "dir-not-empty", + prepare: func(base string) error { + if err := os.MkdirAll(filepath.Join(base, "a/b/c"), defaultPerm); err != nil { + return err + } + return os.Mkdir(filepath.Join(base, "a/b/d"), defaultPerm) + }, + validate: func(base string) error { + if err := validateDirNotExists(filepath.Join(base, "a/b/c")); err != nil { + return err + } + return validateDirExists(filepath.Join(base, "a/b")) + }, + baseDir: "a", + endDir: "a/b/c", + expectError: false, + }, + { + name: "path-not-within-base", + prepare: func(base string) error { + return os.MkdirAll(filepath.Join(base, "a/b/c"), defaultPerm) + }, + validate: func(base string) error { + return validateDirExists(filepath.Join(base, "a")) + }, + baseDir: "a", + endDir: "b/c", + expectError: true, + }, + { + name: "path-already-deleted", + prepare: func(base string) error { + return nil + }, + validate: func(base string) error { + return nil + }, + baseDir: "a", + endDir: "a/b/c", + expectError: false, + }, + { + name: "path-not-dir", + prepare: func(base string) error { + if err := os.MkdirAll(filepath.Join(base, "a/b"), defaultPerm); err != nil { + return err + } + return ioutil.WriteFile(filepath.Join(base, "a/b", "c"), []byte{}, defaultPerm) + }, + validate: func(base string) error { + if err := validateDirExists(filepath.Join(base, "a/b")); err != nil { + return err + } + return validateFileExists(filepath.Join(base, "a/b/c")) + }, + baseDir: "a", + endDir: "a/b/c", + expectError: true, + }, + } + + for _, test := range tests { + glog.V(4).Infof("test %q", test.name) + base, err := ioutil.TempDir("", "remove-empty-dirs-"+test.name+"-") + if err != nil { + t.Fatalf(err.Error()) + } + if err = test.prepare(base); err != nil { + os.RemoveAll(base) + t.Fatalf("failed to prepare test %q: %v", test.name, err.Error()) + } + + err = removeEmptyDirs(filepath.Join(base, test.baseDir), filepath.Join(base, test.endDir)) + if err != nil && !test.expectError { + t.Errorf("test %q failed: %v", test.name, err) + } + if err == nil && test.expectError { + t.Errorf("test %q failed: expected error, got success", test.name) + } + + if err = test.validate(base); err != nil { + t.Errorf("test %q failed validation: %v", test.name, err) + } + + os.RemoveAll(base) + } +} + +func TestCleanSubPaths(t *testing.T) { + defaultPerm := os.FileMode(0750) + testVol := "vol1" + + tests := []struct { + name string + // Function that prepares directory structure for the test under given + // base. + prepare func(base string) ([]MountPoint, error) + // Function that validates directory structure after the test + validate func(base string) error + expectError bool + }{ + { + name: "not-exists", + prepare: func(base string) ([]MountPoint, error) { + return nil, nil + }, + validate: func(base string) error { + return nil + }, + expectError: false, + }, + { + name: "subpath-not-mount", + prepare: func(base string) ([]MountPoint, error) { + return nil, os.MkdirAll(filepath.Join(base, containerSubPathDirectoryName, testVol, "container1", "0"), defaultPerm) + }, + validate: func(base string) error { + return validateDirNotExists(filepath.Join(base, containerSubPathDirectoryName)) + }, + expectError: false, + }, + { + name: "subpath-file", + prepare: func(base string) ([]MountPoint, error) { + path := filepath.Join(base, containerSubPathDirectoryName, testVol, "container1") + if err := os.MkdirAll(path, defaultPerm); err != nil { + return nil, err + } + return nil, ioutil.WriteFile(filepath.Join(path, "0"), []byte{}, defaultPerm) + }, + validate: func(base string) error { + return validateDirNotExists(filepath.Join(base, containerSubPathDirectoryName)) + }, + expectError: false, + }, + { + name: "subpath-container-not-dir", + prepare: func(base string) ([]MountPoint, error) { + path := filepath.Join(base, containerSubPathDirectoryName, testVol) + if err := os.MkdirAll(path, defaultPerm); err != nil { + return nil, err + } + return nil, ioutil.WriteFile(filepath.Join(path, "container1"), []byte{}, defaultPerm) + }, + validate: func(base string) error { + return validateDirExists(filepath.Join(base, containerSubPathDirectoryName, testVol)) + }, + expectError: true, + }, + { + name: "subpath-multiple-container-not-dir", + prepare: func(base string) ([]MountPoint, error) { + path := filepath.Join(base, containerSubPathDirectoryName, testVol) + if err := os.MkdirAll(filepath.Join(path, "container1"), defaultPerm); err != nil { + return nil, err + } + return nil, ioutil.WriteFile(filepath.Join(path, "container2"), []byte{}, defaultPerm) + }, + validate: func(base string) error { + path := filepath.Join(base, containerSubPathDirectoryName, testVol) + if err := validateDirNotExists(filepath.Join(path, "container1")); err != nil { + return err + } + return validateFileExists(filepath.Join(path, "container2")) + }, + expectError: true, + }, + { + name: "subpath-mount", + prepare: func(base string) ([]MountPoint, error) { + path := filepath.Join(base, containerSubPathDirectoryName, testVol, "container1", "0") + if err := os.MkdirAll(path, defaultPerm); err != nil { + return nil, err + } + mounts := []MountPoint{{Device: "/dev/sdb", Path: path}} + return mounts, nil + }, + validate: func(base string) error { + return validateDirNotExists(filepath.Join(base, containerSubPathDirectoryName)) + }, + }, + { + name: "subpath-mount-multiple", + prepare: func(base string) ([]MountPoint, error) { + path := filepath.Join(base, containerSubPathDirectoryName, testVol, "container1", "0") + path2 := filepath.Join(base, containerSubPathDirectoryName, testVol, "container1", "1") + path3 := filepath.Join(base, containerSubPathDirectoryName, testVol, "container2", "1") + if err := os.MkdirAll(path, defaultPerm); err != nil { + return nil, err + } + if err := os.MkdirAll(path2, defaultPerm); err != nil { + return nil, err + } + if err := os.MkdirAll(path3, defaultPerm); err != nil { + return nil, err + } + mounts := []MountPoint{ + {Device: "/dev/sdb", Path: path}, + {Device: "/dev/sdb", Path: path3}, + } + return mounts, nil + }, + validate: func(base string) error { + return validateDirNotExists(filepath.Join(base, containerSubPathDirectoryName)) + }, + }, + { + name: "subpath-mount-multiple-vols", + prepare: func(base string) ([]MountPoint, error) { + path := filepath.Join(base, containerSubPathDirectoryName, testVol, "container1", "0") + path2 := filepath.Join(base, containerSubPathDirectoryName, "vol2", "container1", "1") + if err := os.MkdirAll(path, defaultPerm); err != nil { + return nil, err + } + if err := os.MkdirAll(path2, defaultPerm); err != nil { + return nil, err + } + mounts := []MountPoint{ + {Device: "/dev/sdb", Path: path}, + } + return mounts, nil + }, + validate: func(base string) error { + baseSubdir := filepath.Join(base, containerSubPathDirectoryName) + if err := validateDirNotExists(filepath.Join(baseSubdir, testVol)); err != nil { + return err + } + return validateDirExists(baseSubdir) + }, + }, + } + + for _, test := range tests { + glog.V(4).Infof("test %q", test.name) + base, err := ioutil.TempDir("", "clean-subpaths-"+test.name+"-") + if err != nil { + t.Fatalf(err.Error()) + } + mounts, err := test.prepare(base) + if err != nil { + os.RemoveAll(base) + t.Fatalf("failed to prepare test %q: %v", test.name, err.Error()) + } + + fm := &FakeMounter{MountPoints: mounts} + + err = doCleanSubPaths(fm, base, testVol) + if err != nil && !test.expectError { + t.Errorf("test %q failed: %v", test.name, err) + } + if err == nil && test.expectError { + t.Errorf("test %q failed: expected error, got success", test.name) + } + if err = test.validate(base); err != nil { + t.Errorf("test %q failed validation: %v", test.name, err) + } + + os.RemoveAll(base) + } +} + +var ( + testVol = "vol1" + testPod = "pod0" + testContainer = "container0" + testSubpath = 1 +) + +func setupFakeMounter(testMounts []string) *FakeMounter { + mounts := []MountPoint{} + for _, mountPoint := range testMounts { + mounts = append(mounts, MountPoint{Device: "/foo", Path: mountPoint}) + } + return &FakeMounter{MountPoints: mounts} +} + +func getTestPaths(base string) (string, string) { + return filepath.Join(base, testVol), + filepath.Join(base, testPod, containerSubPathDirectoryName, testVol, testContainer, strconv.Itoa(testSubpath)) +} + +func TestBindSubPath(t *testing.T) { + defaultPerm := os.FileMode(0750) + + tests := []struct { + name string + // Function that prepares directory structure for the test under given + // base. + prepare func(base string) ([]string, string, string, error) + expectError bool + }{ + { + name: "subpath-dir", + prepare: func(base string) ([]string, string, string, error) { + volpath, _ := getTestPaths(base) + subpath := filepath.Join(volpath, "dir0") + return nil, volpath, subpath, os.MkdirAll(subpath, defaultPerm) + }, + expectError: false, + }, + { + name: "subpath-dir-symlink", + prepare: func(base string) ([]string, string, string, error) { + volpath, _ := getTestPaths(base) + subpath := filepath.Join(volpath, "dir0") + if err := os.MkdirAll(subpath, defaultPerm); err != nil { + return nil, "", "", err + } + subpathLink := filepath.Join(volpath, "dirLink") + return nil, volpath, subpath, os.Symlink(subpath, subpathLink) + }, + expectError: false, + }, + { + name: "subpath-file", + prepare: func(base string) ([]string, string, string, error) { + volpath, _ := getTestPaths(base) + subpath := filepath.Join(volpath, "file0") + if err := os.MkdirAll(volpath, defaultPerm); err != nil { + return nil, "", "", err + } + return nil, volpath, subpath, ioutil.WriteFile(subpath, []byte{}, defaultPerm) + }, + expectError: false, + }, + { + name: "subpath-not-exists", + prepare: func(base string) ([]string, string, string, error) { + volpath, _ := getTestPaths(base) + subpath := filepath.Join(volpath, "file0") + return nil, volpath, subpath, nil + }, + expectError: true, + }, + { + name: "subpath-outside", + prepare: func(base string) ([]string, string, string, error) { + volpath, _ := getTestPaths(base) + subpath := filepath.Join(volpath, "dir0") + if err := os.MkdirAll(volpath, defaultPerm); err != nil { + return nil, "", "", err + } + return nil, volpath, subpath, os.Symlink(base, subpath) + }, + expectError: true, + }, + { + name: "subpath-symlink-child-outside", + prepare: func(base string) ([]string, string, string, error) { + volpath, _ := getTestPaths(base) + subpathDir := filepath.Join(volpath, "dir0") + subpath := filepath.Join(subpathDir, "child0") + if err := os.MkdirAll(subpathDir, defaultPerm); err != nil { + return nil, "", "", err + } + return nil, volpath, subpath, os.Symlink(base, subpath) + }, + expectError: true, + }, + { + name: "subpath-child-outside-exists", + prepare: func(base string) ([]string, string, string, error) { + volpath, _ := getTestPaths(base) + subpathDir := filepath.Join(volpath, "dir0") + child := filepath.Join(base, "child0") + subpath := filepath.Join(subpathDir, "child0") + if err := os.MkdirAll(volpath, defaultPerm); err != nil { + return nil, "", "", err + } + // touch file outside + if err := ioutil.WriteFile(child, []byte{}, defaultPerm); err != nil { + return nil, "", "", err + } + + // create symlink for subpath dir + return nil, volpath, subpath, os.Symlink(base, subpathDir) + }, + expectError: true, + }, + { + name: "subpath-child-outside-not-exists", + prepare: func(base string) ([]string, string, string, error) { + volpath, _ := getTestPaths(base) + subpathDir := filepath.Join(volpath, "dir0") + subpath := filepath.Join(subpathDir, "child0") + if err := os.MkdirAll(volpath, defaultPerm); err != nil { + return nil, "", "", err + } + // create symlink for subpath dir + return nil, volpath, subpath, os.Symlink(base, subpathDir) + }, + expectError: true, + }, + { + name: "subpath-child-outside-exists-middle-dir-symlink", + prepare: func(base string) ([]string, string, string, error) { + volpath, _ := getTestPaths(base) + subpathDir := filepath.Join(volpath, "dir0") + symlinkDir := filepath.Join(subpathDir, "linkDir0") + child := filepath.Join(base, "child0") + subpath := filepath.Join(symlinkDir, "child0") + if err := os.MkdirAll(subpathDir, defaultPerm); err != nil { + return nil, "", "", err + } + // touch file outside + if err := ioutil.WriteFile(child, []byte{}, defaultPerm); err != nil { + return nil, "", "", err + } + + // create symlink for middle dir + return nil, volpath, subpath, os.Symlink(base, symlinkDir) + }, + expectError: true, + }, + { + name: "subpath-backstepping", + prepare: func(base string) ([]string, string, string, error) { + volpath, _ := getTestPaths(base) + subpath := filepath.Join(volpath, "dir0") + symlinkBase := filepath.Join(volpath, "..") + if err := os.MkdirAll(volpath, defaultPerm); err != nil { + return nil, "", "", err + } + + // create symlink for subpath + return nil, volpath, subpath, os.Symlink(symlinkBase, subpath) + }, + expectError: true, + }, + { + name: "subpath-mountdir-already-exists", + prepare: func(base string) ([]string, string, string, error) { + volpath, subpathMount := getTestPaths(base) + if err := os.MkdirAll(subpathMount, defaultPerm); err != nil { + return nil, "", "", err + } + + subpath := filepath.Join(volpath, "dir0") + return nil, volpath, subpath, os.MkdirAll(subpath, defaultPerm) + }, + expectError: false, + }, + { + name: "subpath-mount-already-exists", + prepare: func(base string) ([]string, string, string, error) { + volpath, subpathMount := getTestPaths(base) + mounts := []string{subpathMount} + if err := os.MkdirAll(subpathMount, defaultPerm); err != nil { + return nil, "", "", err + } + + subpath := filepath.Join(volpath, "dir0") + return mounts, volpath, subpath, os.MkdirAll(subpath, defaultPerm) + }, + expectError: false, + }, + } + + for _, test := range tests { + glog.V(4).Infof("test %q", test.name) + base, err := ioutil.TempDir("", "bind-subpath-"+test.name+"-") + if err != nil { + t.Fatalf(err.Error()) + } + mounts, volPath, subPath, err := test.prepare(base) + if err != nil { + os.RemoveAll(base) + t.Fatalf("failed to prepare test %q: %v", test.name, err.Error()) + } + + fm := setupFakeMounter(mounts) + + subpath := Subpath{ + VolumeMountIndex: testSubpath, + Path: subPath, + VolumeName: testVol, + VolumePath: volPath, + PodDir: filepath.Join(base, "pod0"), + ContainerName: testContainer, + } + + _, subpathMount := getTestPaths(base) + bindPathTarget, err := doBindSubPath(fm, subpath, 1) + if test.expectError { + if err == nil { + t.Errorf("test %q failed: expected error, got success", test.name) + } + if bindPathTarget != "" { + t.Errorf("test %q failed: expected empty bindPathTarget, got %v", test.name, bindPathTarget) + } + if err = validateDirNotExists(subpathMount); err != nil { + t.Errorf("test %q failed: %v", test.name, err) + } + } + if !test.expectError { + if err != nil { + t.Errorf("test %q failed: %v", test.name, err) + } + if bindPathTarget != subpathMount { + t.Errorf("test %q failed: expected bindPathTarget %v, got %v", test.name, subpathMount, bindPathTarget) + } + if err = validateFileExists(subpathMount); err != nil { + t.Errorf("test %q failed: %v", test.name, err) + } + } + + os.RemoveAll(base) + } +} + +func TestParseMountInfo(t *testing.T) { + info := + `62 0 253:0 / / rw,relatime shared:1 - ext4 /dev/mapper/ssd-root rw,seclabel,data=ordered +78 62 0:41 / /tmp rw,nosuid,nodev shared:30 - tmpfs tmpfs rw,seclabel +80 62 0:42 / /var/lib/nfs/rpc_pipefs rw,relatime shared:31 - rpc_pipefs sunrpc rw +82 62 0:43 / /var/lib/foo rw,relatime shared:32 - tmpfs tmpfs rw +83 63 0:44 / /var/lib/bar rw,relatime - tmpfs tmpfs rw +227 62 253:0 /var/lib/docker/devicemapper /var/lib/docker/devicemapper rw,relatime - ext4 /dev/mapper/ssd-root rw,seclabel,data=ordered +224 62 253:0 /var/lib/docker/devicemapper/test/shared /var/lib/docker/devicemapper/test/shared rw,relatime master:1 shared:44 - ext4 /dev/mapper/ssd-root rw,seclabel,data=ordered +76 17 8:1 / /mnt/stateful_partition rw,nosuid,nodev,noexec,relatime - ext4 /dev/sda1 rw,commit=30,data=ordered +80 17 8:1 /var /var rw,nosuid,nodev,noexec,relatime shared:30 - ext4 /dev/sda1 rw,commit=30,data=ordered +189 80 8:1 /var/lib/kubelet /var/lib/kubelet rw,relatime shared:30 - ext4 /dev/sda1 rw,commit=30,data=ordered +818 77 8:40 / /var/lib/kubelet/pods/c25464af-e52e-11e7-ab4d-42010a800002/volumes/kubernetes.io~gce-pd/vol1 rw,relatime shared:290 - ext4 /dev/sdc rw,data=ordered +819 78 8:48 / /var/lib/kubelet/pods/c25464af-e52e-11e7-ab4d-42010a800002/volumes/kubernetes.io~gce-pd/vol1 rw,relatime shared:290 - ext4 /dev/sdd rw,data=ordered +900 100 8:48 /dir1 /var/lib/kubelet/pods/c25464af-e52e-11e7-ab4d-42010a800002/volume-subpaths/vol1/subpath1/0 rw,relatime shared:290 - ext4 /dev/sdd rw,data=ordered +901 101 8:1 /dir1 /var/lib/kubelet/pods/c25464af-e52e-11e7-ab4d-42010a800002/volume-subpaths/vol1/subpath1/1 rw,relatime shared:290 - ext4 /dev/sdd rw,data=ordered +902 102 8:1 /var/lib/kubelet/pods/d4076f24-e53a-11e7-ba15-42010a800002/volumes/kubernetes.io~empty-dir/vol1/dir1 /var/lib/kubelet/pods/d4076f24-e53a-11e7-ba15-42010a800002/volume-subpaths/vol1/subpath1/0 rw,relatime shared:30 - ext4 /dev/sda1 rw,commit=30,data=ordered +903 103 8:1 /var/lib/kubelet/pods/d4076f24-e53a-11e7-ba15-42010a800002/volumes/kubernetes.io~empty-dir/vol2/dir1 /var/lib/kubelet/pods/d4076f24-e53a-11e7-ba15-42010a800002/volume-subpaths/vol1/subpath1/1 rw,relatime shared:30 - ext4 /dev/sda1 rw,commit=30,data=ordered +178 25 253:0 /etc/bar /var/lib/kubelet/pods/12345/volume-subpaths/vol1/subpath1/0 rw,relatime shared:1 - ext4 /dev/sdb2 rw,errors=remount-ro,data=ordered +698 186 0:41 /tmp1/dir1 /var/lib/kubelet/pods/41135147-e697-11e7-9342-42010a800002/volume-subpaths/vol1/subpath1/0 rw shared:26 - tmpfs tmpfs rw +918 77 8:50 / /var/lib/kubelet/pods/2345/volumes/kubernetes.io~gce-pd/vol1 rw,relatime shared:290 - ext4 /dev/sdc rw,data=ordered +919 78 8:58 / /var/lib/kubelet/pods/2345/volumes/kubernetes.io~gce-pd/vol1 rw,relatime shared:290 - ext4 /dev/sdd rw,data=ordered +920 100 8:50 /dir1 /var/lib/kubelet/pods/2345/volume-subpaths/vol1/subpath1/0 rw,relatime shared:290 - ext4 /dev/sdc rw,data=ordered +150 23 1:58 / /media/nfs_vol rw,relatime shared:89 - nfs4 172.18.4.223:/srv/nfs rw,vers=4.0,rsize=524288,wsize=524288,namlen=255,hard,proto=tcp,port=0,timeo=600,retrans=2,sec=sys,clientaddr=172.18.4.223,local_lock=none,addr=172.18.4.223 +151 24 1:58 / /media/nfs_bindmount rw,relatime shared:89 - nfs4 172.18.4.223:/srv/nfs/foo rw,vers=4.0,rsize=524288,wsize=524288,namlen=255,hard,proto=tcp,port=0,timeo=600,retrans=2,sec=sys,clientaddr=172.18.4.223,local_lock=none,addr=172.18.4.223 +134 23 0:58 / /var/lib/kubelet/pods/43219158-e5e1-11e7-a392-0e858b8eaf40/volumes/kubernetes.io~nfs/nfs1 rw,relatime shared:89 - nfs4 172.18.4.223:/srv/nfs rw,vers=4.0,rsize=524288,wsize=524288,namlen=255,hard,proto=tcp,port=0,timeo=600,retrans=2,sec=sys,clientaddr=172.18.4.223,local_lock=none,addr=172.18.4.223 +187 23 0:58 / /var/lib/kubelet/pods/1fc5ea21-eff4-11e7-ac80-0e858b8eaf40/volumes/kubernetes.io~nfs/nfs2 rw,relatime shared:96 - nfs4 172.18.4.223:/srv/nfs2 rw,vers=4.0,rsize=524288,wsize=524288,namlen=255,hard,proto=tcp,port=0,timeo=600,retrans=2,sec=sys,clientaddr=172.18.4.223,local_lock=none,addr=172.18.4.223 +188 24 0:58 / /var/lib/kubelet/pods/43219158-e5e1-11e7-a392-0e858b8eaf40/volume-subpaths/nfs1/subpath1/0 rw,relatime shared:89 - nfs4 172.18.4.223:/srv/nfs/foo rw,vers=4.0,rsize=524288,wsize=524288,namlen=255,hard,proto=tcp,port=0,timeo=600,retrans=2,sec=sys,clientaddr=172.18.4.223,local_lock=none,addr=172.18.4.223 +347 60 0:71 / /var/lib/kubelet/pods/13195d46-f9fa-11e7-bbf1-5254007a695a/volumes/kubernetes.io~nfs/vol2 rw,relatime shared:170 - nfs 172.17.0.3:/exports/2 rw,vers=3,rsize=1048576,wsize=1048576,namlen=255,hard,proto=tcp,timeo=600,retrans=2,sec=sys,mountaddr=172.17.0.3,mountvers=3,mountport=20048,mountproto=udp,local_lock=none,addr=172.17.0.3 +` + tempDir, filename, err := writeFile(info) + if err != nil { + t.Fatalf("cannot create temporary file: %v", err) + } + defer os.RemoveAll(tempDir) + + tests := []struct { + name string + mountPoint string + expectedInfo mountInfo + }{ + { + "simple bind mount", + "/var/lib/kubelet", + mountInfo{ + mountPoint: "/var/lib/kubelet", + optional: []string{"shared:30"}, + }, + }, + } + + infos, err := parseMountInfo(filename) + if err != nil { + t.Fatalf("Cannot parse %s: %s", filename, err) + } + + for _, test := range tests { + found := false + for _, info := range infos { + if info.mountPoint == test.mountPoint { + found = true + if !reflect.DeepEqual(info, test.expectedInfo) { + t.Errorf("Test case %q:\n expected: %+v\n got: %+v", test.name, test.expectedInfo, info) + } + break + } + } + if !found { + t.Errorf("Test case %q: mountPoint %s not found", test.name, test.mountPoint) + } + } +} + +func TestSafeOpen(t *testing.T) { + defaultPerm := os.FileMode(0750) + tests := []struct { + name string + // Function that prepares directory structure for the test under given + // base. + prepare func(base string) error + path string + expectError bool + }{ + { + "directory-does-not-exist", + func(base string) error { + return nil + }, + "test/directory", + true, + }, + { + "directory-exists", + func(base string) error { + return os.MkdirAll(filepath.Join(base, "test/directory"), 0750) + }, + "test/directory", + false, + }, + { + "escape-base-using-dots", + func(base string) error { + return nil + }, + "..", + true, + }, + { + "escape-base-using-dots-2", + func(base string) error { + return os.MkdirAll(filepath.Join(base, "test"), 0750) + }, + "test/../../..", + true, + }, + { + "symlink", + func(base string) error { + if err := os.MkdirAll(filepath.Join(base, "destination"), defaultPerm); err != nil { + return err + } + return os.Symlink("destination", filepath.Join(base, "test")) + }, + "test", + true, + }, + { + "symlink-nested", + func(base string) error { + if err := os.MkdirAll(filepath.Join(base, "dir1/dir2"), defaultPerm); err != nil { + return err + } + return os.Symlink("dir1", filepath.Join(base, "dir1/dir2/test")) + }, + "test", + true, + }, + { + "symlink-loop", + func(base string) error { + return os.Symlink("test", filepath.Join(base, "test")) + }, + "test", + true, + }, + { + "symlink-not-exists", + func(base string) error { + return os.Symlink("non-existing", filepath.Join(base, "test")) + }, + "test", + true, + }, + { + "non-directory", + func(base string) error { + return ioutil.WriteFile(filepath.Join(base, "test"), []byte{}, defaultPerm) + }, + "test/directory", + true, + }, + { + "non-directory-final", + func(base string) error { + return ioutil.WriteFile(filepath.Join(base, "test"), []byte{}, defaultPerm) + }, + "test", + false, + }, + { + "escape-with-relative-symlink", + func(base string) error { + if err := os.MkdirAll(filepath.Join(base, "dir"), defaultPerm); err != nil { + return err + } + if err := os.MkdirAll(filepath.Join(base, "exists"), defaultPerm); err != nil { + return err + } + return os.Symlink("../exists", filepath.Join(base, "dir/test")) + }, + "dir/test", + true, + }, + { + "escape-with-relative-symlink-not-exists", + func(base string) error { + if err := os.MkdirAll(filepath.Join(base, "dir"), defaultPerm); err != nil { + return err + } + return os.Symlink("../not-exists", filepath.Join(base, "dir/test")) + }, + "dir/test", + true, + }, + { + "escape-with-symlink", + func(base string) error { + return os.Symlink("/", filepath.Join(base, "test")) + }, + "test", + true, + }, + } + + for _, test := range tests { + glog.V(4).Infof("test %q", test.name) + base, err := ioutil.TempDir("", "safe-open-"+test.name+"-") + if err != nil { + t.Fatalf(err.Error()) + } + test.prepare(base) + pathToCreate := filepath.Join(base, test.path) + fd, err := doSafeOpen(pathToCreate, base) + if err != nil && !test.expectError { + t.Errorf("test %q: %s", test.name, err) + } + if err != nil { + glog.Infof("got error: %s", err) + } + if err == nil && test.expectError { + t.Errorf("test %q: expected error, got none", test.name) + } + + syscall.Close(fd) + os.RemoveAll(base) + } +} + +func TestFindExistingPrefix(t *testing.T) { + defaultPerm := os.FileMode(0750) + tests := []struct { + name string + // Function that prepares directory structure for the test under given + // base. + prepare func(base string) error + path string + expectedPath string + expectedDirs []string + expectError bool + }{ + { + "directory-does-not-exist", + func(base string) error { + return nil + }, + "directory", + "", + []string{"directory"}, + false, + }, + { + "directory-exists", + func(base string) error { + return os.MkdirAll(filepath.Join(base, "test/directory"), 0750) + }, + "test/directory", + "test/directory", + []string{}, + false, + }, + { + "follow-symlinks", + func(base string) error { + if err := os.MkdirAll(filepath.Join(base, "destination/directory"), defaultPerm); err != nil { + return err + } + return os.Symlink("destination", filepath.Join(base, "test")) + }, + "test/directory", + "test/directory", + []string{}, + false, + }, + { + "follow-symlink-loop", + func(base string) error { + return os.Symlink("test", filepath.Join(base, "test")) + }, + "test/directory", + "", + nil, + true, + }, + { + "follow-symlink-multiple follow", + func(base string) error { + /* test1/dir points to test2 and test2/dir points to test1 */ + if err := os.MkdirAll(filepath.Join(base, "test1"), defaultPerm); err != nil { + return err + } + if err := os.MkdirAll(filepath.Join(base, "test2"), defaultPerm); err != nil { + return err + } + if err := os.Symlink(filepath.Join(base, "test2"), filepath.Join(base, "test1/dir")); err != nil { + return err + } + if err := os.Symlink(filepath.Join(base, "test1"), filepath.Join(base, "test2/dir")); err != nil { + return err + } + return nil + }, + "test1/dir/dir/foo/bar", + "test1/dir/dir", + []string{"foo", "bar"}, + false, + }, + { + "danglink-symlink", + func(base string) error { + return os.Symlink("non-existing", filepath.Join(base, "test")) + }, + // OS returns IsNotExist error both for dangling symlink and for + // non-existing directory. + "test/directory", + "", + []string{"test", "directory"}, + false, + }, + } + + for _, test := range tests { + glog.V(4).Infof("test %q", test.name) + base, err := ioutil.TempDir("", "find-prefix-"+test.name+"-") + if err != nil { + t.Fatalf(err.Error()) + } + test.prepare(base) + path := filepath.Join(base, test.path) + existingPath, dirs, err := findExistingPrefix(base, path) + if err != nil && !test.expectError { + t.Errorf("test %q: %s", test.name, err) + } + if err != nil { + glog.Infof("got error: %s", err) + } + if err == nil && test.expectError { + t.Errorf("test %q: expected error, got none", test.name) + } + + fullExpectedPath := filepath.Join(base, test.expectedPath) + if existingPath != fullExpectedPath { + t.Errorf("test %q: expected path %q, got %q", test.name, fullExpectedPath, existingPath) + } + if !reflect.DeepEqual(dirs, test.expectedDirs) { + t.Errorf("test %q: expected dirs %v, got %v", test.name, test.expectedDirs, dirs) + } + os.RemoveAll(base) + } +} diff --git a/pkg/util/mount/mount_unsupported.go b/pkg/util/mount/mount_unsupported.go index 87d1e3748195..acad6eae032e 100644 --- a/pkg/util/mount/mount_unsupported.go +++ b/pkg/util/mount/mount_unsupported.go @@ -20,6 +20,7 @@ package mount import ( "errors" + "os" ) type Mounter struct { @@ -108,3 +109,15 @@ func (mounter *Mounter) MakeFile(pathname string) error { func (mounter *Mounter) ExistsPath(pathname string) bool { return true } + +func (mounter *Mounter) PrepareSafeSubpath(subPath Subpath) (newHostPath string, cleanupAction func(), err error) { + return subPath.Path, nil, nil +} + +func (mounter *Mounter) CleanSubPaths(podDir string, volumeName string) error { + return nil +} + +func (mounter *Mounter) SafeMakeDir(pathname string, base string, perm os.FileMode) error { + return nil +} diff --git a/pkg/util/mount/mount_windows.go b/pkg/util/mount/mount_windows.go index 0c63626ea3a3..9ba4417f9797 100644 --- a/pkg/util/mount/mount_windows.go +++ b/pkg/util/mount/mount_windows.go @@ -260,6 +260,123 @@ func (mounter *Mounter) ExistsPath(pathname string) bool { return true } +// check whether hostPath is within volume path +// this func will lock all intermediate subpath directories, need to close handle outside of this func after container started +func lockAndCheckSubPath(volumePath, hostPath string) ([]uintptr, error) { + if len(volumePath) == 0 || len(hostPath) == 0 { + return []uintptr{}, nil + } + + finalSubPath, err := filepath.EvalSymlinks(hostPath) + if err != nil { + return []uintptr{}, fmt.Errorf("cannot read link %s: %s", hostPath, err) + } + finalVolumePath, err := filepath.EvalSymlinks(volumePath) + if err != nil { + return []uintptr{}, fmt.Errorf("cannot read link %s: %s", volumePath, err) + } + + return lockAndCheckSubPathWithoutSymlink(finalVolumePath, finalSubPath) +} + +// lock all intermediate subPath directories and check they are all within volumePath +// volumePath & subPath should not contain any symlink, otherwise it will return error +func lockAndCheckSubPathWithoutSymlink(volumePath, subPath string) ([]uintptr, error) { + if len(volumePath) == 0 || len(subPath) == 0 { + return []uintptr{}, nil + } + + // get relative path to volumePath + relSubPath, err := filepath.Rel(volumePath, subPath) + if err != nil { + return []uintptr{}, fmt.Errorf("Rel(%s, %s) error: %v", volumePath, subPath, err) + } + if strings.HasPrefix(relSubPath, "..") { + return []uintptr{}, fmt.Errorf("SubPath %q not within volume path %q", subPath, volumePath) + } + + if relSubPath == "." { + // volumePath and subPath are equal + return []uintptr{}, nil + } + + fileHandles := []uintptr{} + var errorResult error + + currentFullPath := volumePath + dirs := strings.Split(relSubPath, string(os.PathSeparator)) + for _, dir := range dirs { + // lock intermediate subPath directory first + currentFullPath = filepath.Join(currentFullPath, dir) + handle, err := lockPath(currentFullPath) + if err != nil { + errorResult = fmt.Errorf("cannot lock path %s: %s", currentFullPath, err) + break + } + fileHandles = append(fileHandles, handle) + + // make sure intermediate subPath directory does not contain symlink any more + stat, err := os.Lstat(currentFullPath) + if err != nil { + errorResult = fmt.Errorf("Lstat(%q) error: %v", currentFullPath, err) + break + } + if stat.Mode()&os.ModeSymlink != 0 { + errorResult = fmt.Errorf("subpath %q is an unexpected symlink after EvalSymlinks", currentFullPath) + break + } + + if !pathWithinBase(currentFullPath, volumePath) { + errorResult = fmt.Errorf("SubPath %q not within volume path %q", currentFullPath, volumePath) + break + } + } + + return fileHandles, errorResult +} + +// unlockPath unlock directories +func unlockPath(fileHandles []uintptr) { + if fileHandles != nil { + for _, handle := range fileHandles { + syscall.CloseHandle(syscall.Handle(handle)) + } + } +} + +// lockPath locks a directory or symlink, return handle, exec "syscall.CloseHandle(handle)" to unlock the path +func lockPath(path string) (uintptr, error) { + if len(path) == 0 { + return uintptr(syscall.InvalidHandle), syscall.ERROR_FILE_NOT_FOUND + } + pathp, err := syscall.UTF16PtrFromString(path) + if err != nil { + return uintptr(syscall.InvalidHandle), err + } + access := uint32(syscall.GENERIC_READ) + sharemode := uint32(syscall.FILE_SHARE_READ) + createmode := uint32(syscall.OPEN_EXISTING) + flags := uint32(syscall.FILE_FLAG_BACKUP_SEMANTICS | syscall.FILE_FLAG_OPEN_REPARSE_POINT) + fd, err := syscall.CreateFile(pathp, access, sharemode, nil, createmode, flags, 0) + return uintptr(fd), err +} + +// Lock all directories in subPath and check they're not symlinks. +func (mounter *Mounter) PrepareSafeSubpath(subPath Subpath) (newHostPath string, cleanupAction func(), err error) { + handles, err := lockAndCheckSubPath(subPath.VolumePath, subPath.Path) + + // Unlock the directories when the container starts + cleanupAction = func() { + unlockPath(handles) + } + return subPath.Path, cleanupAction, err +} + +// No bind-mounts for subpaths are necessary on Windows +func (mounter *Mounter) CleanSubPaths(podDir string, volumeName string) error { + return nil +} + func (mounter *SafeFormatAndMount) formatAndMount(source string, target string, fstype string, options []string) error { // Try to mount the disk glog.V(4).Infof("Attempting to formatAndMount disk: %s %s %s", fstype, source, target) @@ -344,3 +461,120 @@ func getAllParentLinks(path string) ([]string, error) { return links, nil } + +// SafeMakeDir makes sure that the created directory does not escape given base directory mis-using symlinks. +func (mounter *Mounter) SafeMakeDir(pathname string, base string, perm os.FileMode) error { + return doSafeMakeDir(pathname, base, perm) +} + +func doSafeMakeDir(pathname string, base string, perm os.FileMode) error { + glog.V(4).Infof("Creating directory %q within base %q", pathname, base) + + if !pathWithinBase(pathname, base) { + return fmt.Errorf("path %s is outside of allowed base %s", pathname, base) + } + + // Quick check if the directory already exists + s, err := os.Stat(pathname) + if err == nil { + // Path exists + if s.IsDir() { + // The directory already exists. It can be outside of the parent, + // but there is no race-proof check. + glog.V(4).Infof("Directory %s already exists", pathname) + return nil + } + return &os.PathError{Op: "mkdir", Path: pathname, Err: syscall.ENOTDIR} + } + + // Find all existing directories + existingPath, toCreate, err := findExistingPrefix(base, pathname) + if err != nil { + return fmt.Errorf("error opening directory %s: %s", pathname, err) + } + if len(toCreate) == 0 { + return nil + } + + // Ensure the existing directory is inside allowed base + fullExistingPath, err := filepath.EvalSymlinks(existingPath) + if err != nil { + return fmt.Errorf("error opening existing directory %s: %s", existingPath, err) + } + fullBasePath, err := filepath.EvalSymlinks(base) + if err != nil { + return fmt.Errorf("cannot read link %s: %s", base, err) + } + if !pathWithinBase(fullExistingPath, fullBasePath) { + return fmt.Errorf("path %s is outside of allowed base %s", fullExistingPath, err) + } + + // lock all intermediate directories from fullBasePath to fullExistingPath (top to bottom) + fileHandles, err := lockAndCheckSubPathWithoutSymlink(fullBasePath, fullExistingPath) + defer unlockPath(fileHandles) + if err != nil { + return err + } + + glog.V(4).Infof("%q already exists, %q to create", fullExistingPath, filepath.Join(toCreate...)) + currentPath := fullExistingPath + // create the directories one by one, making sure nobody can change + // created directory into symlink by lock that directory immediately + for _, dir := range toCreate { + currentPath = filepath.Join(currentPath, dir) + glog.V(4).Infof("Creating %s", dir) + if err := os.Mkdir(currentPath, perm); err != nil { + return fmt.Errorf("cannot create directory %s: %s", currentPath, err) + } + handle, err := lockPath(currentPath) + if err != nil { + return fmt.Errorf("cannot lock path %s: %s", currentPath, err) + } + defer syscall.CloseHandle(syscall.Handle(handle)) + // make sure newly created directory does not contain symlink after lock + stat, err := os.Lstat(currentPath) + if err != nil { + return fmt.Errorf("Lstat(%q) error: %v", currentPath, err) + } + if stat.Mode()&os.ModeSymlink != 0 { + return fmt.Errorf("subpath %q is an unexpected symlink after Mkdir", currentPath) + } + } + + return nil +} + +// findExistingPrefix finds prefix of pathname that exists. In addition, it +// returns list of remaining directories that don't exist yet. +func findExistingPrefix(base, pathname string) (string, []string, error) { + rel, err := filepath.Rel(base, pathname) + if err != nil { + return base, nil, err + } + + if strings.HasPrefix(rel, "..") { + return base, nil, fmt.Errorf("pathname(%s) is not within base(%s)", pathname, base) + } + + if rel == "." { + // base and pathname are equal + return pathname, []string{}, nil + } + + dirs := strings.Split(rel, string(filepath.Separator)) + + parent := base + currentPath := base + for i, dir := range dirs { + parent = currentPath + currentPath = filepath.Join(parent, dir) + if _, err := os.Lstat(currentPath); err != nil { + if os.IsNotExist(err) { + return parent, dirs[i:], nil + } + return base, nil, err + } + } + + return pathname, []string{}, nil +} diff --git a/pkg/util/mount/mount_windows_test.go b/pkg/util/mount/mount_windows_test.go index 5855ede9adb7..76ef08ccc9d3 100644 --- a/pkg/util/mount/mount_windows_test.go +++ b/pkg/util/mount/mount_windows_test.go @@ -20,8 +20,12 @@ package mount import ( "fmt" + "os" "os/exec" + "path/filepath" "testing" + + "github.com/stretchr/testify/assert" ) func TestNormalizeWindowsPath(t *testing.T) { @@ -132,3 +136,413 @@ func TestGetMountRefs(t *testing.T) { } } } + +func TestDoSafeMakeDir(t *testing.T) { + const testingVolumePath = `c:\tmp\DoSafeMakeDirTest` + os.MkdirAll(testingVolumePath, 0755) + defer os.RemoveAll(testingVolumePath) + + tests := []struct { + volumePath string + subPath string + expectError bool + symlinkTarget string + }{ + { + volumePath: testingVolumePath, + subPath: ``, + expectError: true, + symlinkTarget: "", + }, + { + volumePath: testingVolumePath, + subPath: filepath.Join(testingVolumePath, `x`), + expectError: false, + symlinkTarget: "", + }, + { + volumePath: testingVolumePath, + subPath: filepath.Join(testingVolumePath, `a\b\c\d`), + expectError: false, + symlinkTarget: "", + }, + { + volumePath: testingVolumePath, + subPath: filepath.Join(testingVolumePath, `symlink`), + expectError: false, + symlinkTarget: `c:\tmp`, + }, + { + volumePath: testingVolumePath, + subPath: filepath.Join(testingVolumePath, `symlink\c\d`), + expectError: true, + symlinkTarget: "", + }, + { + volumePath: testingVolumePath, + subPath: filepath.Join(testingVolumePath, `symlink\y926`), + expectError: true, + symlinkTarget: "", + }, + { + volumePath: testingVolumePath, + subPath: filepath.Join(testingVolumePath, `a\b\symlink`), + expectError: false, + symlinkTarget: `c:\tmp`, + }, + { + volumePath: testingVolumePath, + subPath: filepath.Join(testingVolumePath, `a\x\symlink`), + expectError: false, + symlinkTarget: filepath.Join(testingVolumePath, `a`), + }, + } + + for _, test := range tests { + if len(test.volumePath) > 0 && len(test.subPath) > 0 && len(test.symlinkTarget) > 0 { + // make all parent sub directories + if parent := filepath.Dir(test.subPath); parent != "." { + os.MkdirAll(parent, 0755) + } + + // make last element as symlink + linkPath := test.subPath + if _, err := os.Stat(linkPath); err != nil && os.IsNotExist(err) { + if err := makeLink(linkPath, test.symlinkTarget); err != nil { + t.Fatalf("unexpected error: %v", fmt.Errorf("mklink link(%q) target(%q) error: %q", linkPath, test.symlinkTarget, err)) + } + } + } + + err := doSafeMakeDir(test.subPath, test.volumePath, os.FileMode(0755)) + if test.expectError { + assert.NotNil(t, err, "Expect error during doSafeMakeDir(%s, %s)", test.subPath, test.volumePath) + continue + } + assert.Nil(t, err, "Expect no error during doSafeMakeDir(%s, %s)", test.subPath, test.volumePath) + if _, err := os.Stat(test.subPath); os.IsNotExist(err) { + t.Errorf("subPath should exists after doSafeMakeDir(%s, %s)", test.subPath, test.volumePath) + } + } +} + +func TestLockAndCheckSubPath(t *testing.T) { + const testingVolumePath = `c:\tmp\LockAndCheckSubPathTest` + + tests := []struct { + volumePath string + subPath string + expectedHandleCount int + expectError bool + symlinkTarget string + }{ + { + volumePath: `c:\`, + subPath: ``, + expectedHandleCount: 0, + expectError: false, + symlinkTarget: "", + }, + { + volumePath: ``, + subPath: `a`, + expectedHandleCount: 0, + expectError: false, + symlinkTarget: "", + }, + { + volumePath: testingVolumePath, + subPath: filepath.Join(testingVolumePath, `a`), + expectedHandleCount: 1, + expectError: false, + symlinkTarget: "", + }, + { + volumePath: testingVolumePath, + subPath: filepath.Join(testingVolumePath, `a\b\c\d`), + expectedHandleCount: 4, + expectError: false, + symlinkTarget: "", + }, + { + volumePath: testingVolumePath, + subPath: filepath.Join(testingVolumePath, `symlink`), + expectedHandleCount: 0, + expectError: true, + symlinkTarget: `c:\tmp`, + }, + { + volumePath: testingVolumePath, + subPath: filepath.Join(testingVolumePath, `a\b\c\symlink`), + expectedHandleCount: 0, + expectError: true, + symlinkTarget: `c:\tmp`, + }, + { + volumePath: testingVolumePath, + subPath: filepath.Join(testingVolumePath, `a\b\c\d\symlink`), + expectedHandleCount: 2, + expectError: false, + symlinkTarget: filepath.Join(testingVolumePath, `a\b`), + }, + } + + for _, test := range tests { + if len(test.volumePath) > 0 && len(test.subPath) > 0 { + os.MkdirAll(test.volumePath, 0755) + if len(test.symlinkTarget) == 0 { + // make all intermediate sub directories + os.MkdirAll(test.subPath, 0755) + } else { + // make all parent sub directories + if parent := filepath.Dir(test.subPath); parent != "." { + os.MkdirAll(parent, 0755) + } + + // make last element as symlink + linkPath := test.subPath + if _, err := os.Stat(linkPath); err != nil && os.IsNotExist(err) { + if err := makeLink(linkPath, test.symlinkTarget); err != nil { + t.Fatalf("unexpected error: %v", fmt.Errorf("mklink link(%q) target(%q) error: %q", linkPath, test.symlinkTarget, err)) + } + } + } + } + + fileHandles, err := lockAndCheckSubPath(test.volumePath, test.subPath) + unlockPath(fileHandles) + assert.Equal(t, test.expectedHandleCount, len(fileHandles)) + if test.expectError { + assert.NotNil(t, err, "Expect error during LockAndCheckSubPath(%s, %s)", test.volumePath, test.subPath) + continue + } + assert.Nil(t, err, "Expect no error during LockAndCheckSubPath(%s, %s)", test.volumePath, test.subPath) + } + + // remove dir will happen after closing all file handles + assert.Nil(t, os.RemoveAll(testingVolumePath), "Expect no error during remove dir %s", testingVolumePath) +} + +func TestLockAndCheckSubPathWithoutSymlink(t *testing.T) { + const testingVolumePath = `c:\tmp\LockAndCheckSubPathWithoutSymlinkTest` + + tests := []struct { + volumePath string + subPath string + expectedHandleCount int + expectError bool + symlinkTarget string + }{ + { + volumePath: `c:\`, + subPath: ``, + expectedHandleCount: 0, + expectError: false, + symlinkTarget: "", + }, + { + volumePath: ``, + subPath: `a`, + expectedHandleCount: 0, + expectError: false, + symlinkTarget: "", + }, + { + volumePath: testingVolumePath, + subPath: filepath.Join(testingVolumePath, `a`), + expectedHandleCount: 1, + expectError: false, + symlinkTarget: "", + }, + { + volumePath: testingVolumePath, + subPath: filepath.Join(testingVolumePath, `a\b\c\d`), + expectedHandleCount: 4, + expectError: false, + symlinkTarget: "", + }, + { + volumePath: testingVolumePath, + subPath: filepath.Join(testingVolumePath, `symlink`), + expectedHandleCount: 1, + expectError: true, + symlinkTarget: `c:\tmp`, + }, + { + volumePath: testingVolumePath, + subPath: filepath.Join(testingVolumePath, `a\b\c\symlink`), + expectedHandleCount: 4, + expectError: true, + symlinkTarget: `c:\tmp`, + }, + { + volumePath: testingVolumePath, + subPath: filepath.Join(testingVolumePath, `a\b\c\d\symlink`), + expectedHandleCount: 5, + expectError: true, + symlinkTarget: filepath.Join(testingVolumePath, `a\b`), + }, + } + + for _, test := range tests { + if len(test.volumePath) > 0 && len(test.subPath) > 0 { + os.MkdirAll(test.volumePath, 0755) + if len(test.symlinkTarget) == 0 { + // make all intermediate sub directories + os.MkdirAll(test.subPath, 0755) + } else { + // make all parent sub directories + if parent := filepath.Dir(test.subPath); parent != "." { + os.MkdirAll(parent, 0755) + } + + // make last element as symlink + linkPath := test.subPath + if _, err := os.Stat(linkPath); err != nil && os.IsNotExist(err) { + if err := makeLink(linkPath, test.symlinkTarget); err != nil { + t.Fatalf("unexpected error: %v", fmt.Errorf("mklink link(%q) target(%q) error: %q", linkPath, test.symlinkTarget, err)) + } + } + } + } + + fileHandles, err := lockAndCheckSubPathWithoutSymlink(test.volumePath, test.subPath) + unlockPath(fileHandles) + assert.Equal(t, test.expectedHandleCount, len(fileHandles)) + if test.expectError { + assert.NotNil(t, err, "Expect error during LockAndCheckSubPath(%s, %s)", test.volumePath, test.subPath) + continue + } + assert.Nil(t, err, "Expect no error during LockAndCheckSubPath(%s, %s)", test.volumePath, test.subPath) + } + + // remove dir will happen after closing all file handles + assert.Nil(t, os.RemoveAll(testingVolumePath), "Expect no error during remove dir %s", testingVolumePath) +} + +func TestFindExistingPrefix(t *testing.T) { + const testingVolumePath = `c:\tmp\FindExistingPrefixTest` + + tests := []struct { + base string + pathname string + expectError bool + expectedExistingPath string + expectedToCreateDirs []string + createSubPathBeforeTest bool + }{ + { + base: `c:\tmp\a`, + pathname: `c:\tmp\b`, + expectError: true, + expectedExistingPath: "", + expectedToCreateDirs: []string{}, + createSubPathBeforeTest: false, + }, + { + base: ``, + pathname: `c:\tmp\b`, + expectError: true, + expectedExistingPath: "", + expectedToCreateDirs: []string{}, + createSubPathBeforeTest: false, + }, + { + base: `c:\tmp\a`, + pathname: `d:\tmp\b`, + expectError: true, + expectedExistingPath: "", + expectedToCreateDirs: []string{}, + createSubPathBeforeTest: false, + }, + { + base: testingVolumePath, + pathname: testingVolumePath, + expectError: false, + expectedExistingPath: testingVolumePath, + expectedToCreateDirs: []string{}, + createSubPathBeforeTest: false, + }, + { + base: testingVolumePath, + pathname: filepath.Join(testingVolumePath, `a\b`), + expectError: false, + expectedExistingPath: filepath.Join(testingVolumePath, `a\b`), + expectedToCreateDirs: []string{}, + createSubPathBeforeTest: true, + }, + { + base: testingVolumePath, + pathname: filepath.Join(testingVolumePath, `a\b\c\`), + expectError: false, + expectedExistingPath: filepath.Join(testingVolumePath, `a\b`), + expectedToCreateDirs: []string{`c`}, + createSubPathBeforeTest: false, + }, + { + base: testingVolumePath, + pathname: filepath.Join(testingVolumePath, `a\b\c\d`), + expectError: false, + expectedExistingPath: filepath.Join(testingVolumePath, `a\b`), + expectedToCreateDirs: []string{`c`, `d`}, + createSubPathBeforeTest: false, + }, + } + + for _, test := range tests { + if test.createSubPathBeforeTest { + os.MkdirAll(test.pathname, 0755) + } + + existingPath, toCreate, err := findExistingPrefix(test.base, test.pathname) + if test.expectError { + assert.NotNil(t, err, "Expect error during findExistingPrefix(%s, %s)", test.base, test.pathname) + continue + } + assert.Nil(t, err, "Expect no error during findExistingPrefix(%s, %s)", test.base, test.pathname) + + assert.Equal(t, test.expectedExistingPath, existingPath, "Expect result not equal with findExistingPrefix(%s, %s) return: %q, expected: %q", + test.base, test.pathname, existingPath, test.expectedExistingPath) + + assert.Equal(t, test.expectedToCreateDirs, toCreate, "Expect result not equal with findExistingPrefix(%s, %s) return: %q, expected: %q", + test.base, test.pathname, toCreate, test.expectedToCreateDirs) + + } + // remove dir will happen after closing all file handles + assert.Nil(t, os.RemoveAll(testingVolumePath), "Expect no error during remove dir %s", testingVolumePath) +} + +func TestPathWithinBase(t *testing.T) { + tests := []struct { + fullPath string + basePath string + expectedResult bool + }{ + { + fullPath: `c:\tmp\a\b\c`, + basePath: `c:\tmp`, + expectedResult: true, + }, + { + fullPath: `c:\tmp1`, + basePath: `c:\tmp2`, + expectedResult: false, + }, + { + fullPath: `c:\tmp`, + basePath: `c:\tmp`, + expectedResult: true, + }, + { + fullPath: `c:\tmp`, + basePath: `c:\tmp\a\b\c`, + expectedResult: false, + }, + } + + for _, test := range tests { + result := pathWithinBase(test.fullPath, test.basePath) + assert.Equal(t, result, test.expectedResult, "Expect result not equal with pathWithinBase(%s, %s) return: %q, expected: %q", + test.fullPath, test.basePath, result, test.expectedResult) + } +} diff --git a/pkg/util/mount/nsenter_mount.go b/pkg/util/mount/nsenter_mount.go index 99e81837fdec..dd2ad3c64040 100644 --- a/pkg/util/mount/nsenter_mount.go +++ b/pkg/util/mount/nsenter_mount.go @@ -22,9 +22,12 @@ import ( "fmt" "os" "path/filepath" + "regexp" + "strconv" "strings" "github.com/golang/glog" + utilio "k8s.io/kubernetes/pkg/util/io" "k8s.io/kubernetes/pkg/util/nsenter" ) @@ -33,6 +36,13 @@ const ( hostProcMountsPath = "/rootfs/proc/1/mounts" // hostProcMountinfoPath is the default mount info path for rootfs hostProcMountinfoPath = "/rootfs/proc/1/mountinfo" + // hostProcSelfStatusPath is the default path to /proc/self/status on the host + hostProcSelfStatusPath = "/rootfs/proc/self/status" +) + +var ( + // pidRegExp matches "Pid: " in /proc/self/status + pidRegExp = regexp.MustCompile(`\nPid:\t([0-9]*)\n`) ) // Currently, all docker containers receive their own mount namespaces. @@ -270,3 +280,41 @@ func (mounter *NsenterMounter) ExistsPath(pathname string) bool { } return false } + +func (mounter *NsenterMounter) CleanSubPaths(podDir string, volumeName string) error { + return doCleanSubPaths(mounter, podDir, volumeName) +} + +// getPidOnHost returns kubelet's pid in the host pid namespace +func (mounter *NsenterMounter) getPidOnHost(procStatusPath string) (int, error) { + // Get the PID from /rootfs/proc/self/status + statusBytes, err := utilio.ConsistentRead(procStatusPath, maxListTries) + if err != nil { + return 0, fmt.Errorf("error reading %s: %s", procStatusPath, err) + } + matches := pidRegExp.FindSubmatch(statusBytes) + if len(matches) < 2 { + return 0, fmt.Errorf("cannot parse %s: no Pid:", procStatusPath) + } + return strconv.Atoi(string(matches[1])) +} + +func (mounter *NsenterMounter) PrepareSafeSubpath(subPath Subpath) (newHostPath string, cleanupAction func(), err error) { + hostPid, err := mounter.getPidOnHost(hostProcSelfStatusPath) + if err != nil { + return "", nil, err + } + glog.V(4).Infof("Kubelet's PID on the host is %d", hostPid) + + // Bind-mount the subpath to avoid using symlinks in subpaths. + newHostPath, err = doBindSubPath(mounter, subPath, hostPid) + + // There is no action when the container starts. Bind-mount will be cleaned + // when container stops by CleanSubPaths. + cleanupAction = nil + return newHostPath, cleanupAction, err +} + +func (mounter *NsenterMounter) SafeMakeDir(pathname string, base string, perm os.FileMode) error { + return doSafeMakeDir(pathname, base, perm) +} diff --git a/pkg/util/mount/nsenter_mount_test.go b/pkg/util/mount/nsenter_mount_test.go index 946c0feb2d34..2aee4ad5f9d1 100644 --- a/pkg/util/mount/nsenter_mount_test.go +++ b/pkg/util/mount/nsenter_mount_test.go @@ -18,7 +18,13 @@ limitations under the License. package mount -import "testing" +import ( + "io/ioutil" + "os" + "path" + "strconv" + "testing" +) func TestParseFindMnt(t *testing.T) { tests := []struct { @@ -65,3 +71,121 @@ func TestParseFindMnt(t *testing.T) { } } } + +func TestGetPidOnHost(t *testing.T) { + tempDir, err := ioutil.TempDir("", "get_pid_on_host_tests") + if err != nil { + t.Fatalf(err.Error()) + } + defer os.RemoveAll(tempDir) + + tests := []struct { + name string + procFile string + expectedPid int + expectError bool + }{ + { + name: "valid status file", + procFile: `Name: cat +Umask: 0002 +State: R (running) +Tgid: 15041 +Ngid: 0 +Pid: 15041 +PPid: 22699 +TracerPid: 0 +Uid: 1000 1000 1000 1000 +Gid: 1000 1000 1000 1000 +FDSize: 256 +Groups: 10 135 156 157 158 973 984 1000 1001 +NStgid: 15041 +NSpid: 15041 +NSpgid: 15041 +NSsid: 22699 +VmPeak: 115016 kB +VmSize: 115016 kB +VmLck: 0 kB +VmPin: 0 kB +VmHWM: 816 kB +VmRSS: 816 kB +RssAnon: 64 kB +RssFile: 752 kB +RssShmem: 0 kB +VmData: 312 kB +VmStk: 136 kB +VmExe: 32 kB +VmLib: 2060 kB +VmPTE: 44 kB +VmPMD: 12 kB +VmSwap: 0 kB +HugetlbPages: 0 kB +Threads: 1 +SigQ: 2/60752 +SigPnd: 0000000000000000 +ShdPnd: 0000000000000000 +SigBlk: 0000000000000000 +SigIgn: 0000000000000000 +SigCgt: 0000000000000000 +CapInh: 0000000000000000 +CapPrm: 0000000000000000 +CapEff: 0000000000000000 +CapBnd: 0000003fffffffff +CapAmb: 0000000000000000 +NoNewPrivs: 0 +Seccomp: 0 +Cpus_allowed: ff +Cpus_allowed_list: 0-7 +Mems_allowed: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000001 +Mems_allowed_list: 0 +voluntary_ctxt_switches: 0 +nonvoluntary_ctxt_switches: 0 +`, + expectedPid: 15041, + }, + { + name: "no Pid:", + procFile: `Name: cat +Umask: 0002 +State: R (running) +Tgid: 15041 +Ngid: 0 +PPid: 22699 +`, + expectedPid: 0, + expectError: true, + }, + { + name: "invalid Pid:", + procFile: `Name: cat +Umask: 0002 +State: R (running) +Tgid: 15041 +Ngid: 0 +Pid: invalid +PPid: 22699 +`, + expectedPid: 0, + expectError: true, + }, + } + + for i, test := range tests { + filename := path.Join(tempDir, strconv.Itoa(i)) + err := ioutil.WriteFile(filename, []byte(test.procFile), 0666) + if err != nil { + t.Fatalf(err.Error()) + } + mounter := NsenterMounter{} + pid, err := mounter.getPidOnHost(filename) + if err != nil && !test.expectError { + t.Errorf("Test %q: unexpected error: %s", test.name, err) + } + if err == nil && test.expectError { + t.Errorf("Test %q: expected error, got none", test.name) + } + if pid != test.expectedPid { + t.Errorf("Test %q: expected pid %d, got %d", test.name, test.expectedPid, pid) + } + } +} diff --git a/pkg/util/mount/nsenter_mount_unsupported.go b/pkg/util/mount/nsenter_mount_unsupported.go index f4eb692f9584..4b29f0ba420d 100644 --- a/pkg/util/mount/nsenter_mount_unsupported.go +++ b/pkg/util/mount/nsenter_mount_unsupported.go @@ -20,6 +20,7 @@ package mount import ( "errors" + "os" ) type NsenterMounter struct{} @@ -85,3 +86,15 @@ func (*NsenterMounter) MakeFile(pathname string) error { func (*NsenterMounter) ExistsPath(pathname string) bool { return true } + +func (*NsenterMounter) SafeMakeDir(pathname string, base string, perm os.FileMode) error { + return nil +} + +func (*NsenterMounter) PrepareSafeSubpath(subPath Subpath) (newHostPath string, cleanupAction func(), err error) { + return subPath.Path, nil, nil +} + +func (*NsenterMounter) CleanSubPaths(podDir string, volumeName string) error { + return nil +} diff --git a/pkg/util/removeall/removeall_test.go b/pkg/util/removeall/removeall_test.go index 22bdd092ed36..99eaf7c46ee9 100644 --- a/pkg/util/removeall/removeall_test.go +++ b/pkg/util/removeall/removeall_test.go @@ -79,6 +79,18 @@ func (mounter *fakeMounter) ExistsPath(pathname string) bool { return true } +func (mounter *fakeMounter) PrepareSafeSubpath(subPath mount.Subpath) (newHostPath string, cleanupAction func(), err error) { + return "", nil, nil +} + +func (mounter *fakeMounter) CleanSubPaths(_, _ string) error { + return nil +} + +func (mounter *fakeMounter) SafeMakeDir(_, _ string, _ os.FileMode) error { + return nil +} + func (mounter *fakeMounter) IsLikelyNotMountPoint(file string) (bool, error) { name := path.Base(file) if strings.HasPrefix(name, "mount") { diff --git a/pkg/volume/host_path/host_path_test.go b/pkg/volume/host_path/host_path_test.go index 664d0e6b9fbf..b5220ca47e33 100644 --- a/pkg/volume/host_path/host_path_test.go +++ b/pkg/volume/host_path/host_path_test.go @@ -376,6 +376,18 @@ func (fftc *fakeFileTypeChecker) GetFileType(_ string) (utilmount.FileType, erro return utilmount.FileType(fftc.desiredType), nil } +func (fftc *fakeFileTypeChecker) PrepareSafeSubpath(subPath utilmount.Subpath) (newHostPath string, cleanupAction func(), err error) { + return "", nil, nil +} + +func (fftc *fakeFileTypeChecker) CleanSubPaths(_, _ string) error { + return nil +} + +func (fftc *fakeFileTypeChecker) SafeMakeDir(_, _ string, _ os.FileMode) error { + return nil +} + func setUp() error { err := os.MkdirAll("/tmp/ExistingFolder", os.FileMode(0755)) if err != nil { diff --git a/pkg/volume/util/operationexecutor/operation_executor.go b/pkg/volume/util/operationexecutor/operation_executor.go index 9aa04a06accf..c586b959ade0 100644 --- a/pkg/volume/util/operationexecutor/operation_executor.go +++ b/pkg/volume/util/operationexecutor/operation_executor.go @@ -112,7 +112,7 @@ type OperationExecutor interface { // For 'Block' volumeMode, this method unmaps symbolic link to the volume // from both the pod device map path in volumeToUnmount and global map path. // And then, updates the actual state of the world to reflect that. - UnmountVolume(volumeToUnmount MountedVolume, actualStateOfWorld ActualStateOfWorldMounterUpdater) error + UnmountVolume(volumeToUnmount MountedVolume, actualStateOfWorld ActualStateOfWorldMounterUpdater, podsDir string) error // If a volume has 'Filesystem' volumeMode, UnmountDevice unmounts the // volumes global mount path from the device (for attachable volumes only, @@ -746,7 +746,8 @@ func (oe *operationExecutor) MountVolume( func (oe *operationExecutor) UnmountVolume( volumeToUnmount MountedVolume, - actualStateOfWorld ActualStateOfWorldMounterUpdater) error { + actualStateOfWorld ActualStateOfWorldMounterUpdater, + podsDir string) error { fsVolume, err := util.CheckVolumeModeFilesystem(volumeToUnmount.VolumeSpec) if err != nil { return err @@ -756,7 +757,7 @@ func (oe *operationExecutor) UnmountVolume( // Filesystem volume case // Unmount a volume if a volume is mounted generatedOperations, err = oe.operationGenerator.GenerateUnmountVolumeFunc( - volumeToUnmount, actualStateOfWorld) + volumeToUnmount, actualStateOfWorld, podsDir) } else { // Block volume case // Unmap a volume if a volume is mapped diff --git a/pkg/volume/util/operationexecutor/operation_executor_test.go b/pkg/volume/util/operationexecutor/operation_executor_test.go index 2453a6e630f4..0551c4733fbf 100644 --- a/pkg/volume/util/operationexecutor/operation_executor_test.go +++ b/pkg/volume/util/operationexecutor/operation_executor_test.go @@ -124,7 +124,7 @@ func TestOperationExecutor_UnmountVolume_ConcurrentUnmountForAllPlugins(t *testi PodUID: pod.UID, } } - oe.UnmountVolume(volumesToUnmount[i], nil /* actualStateOfWorldMounterUpdater */) + oe.UnmountVolume(volumesToUnmount[i], nil /* actualStateOfWorldMounterUpdater */, "" /*podsDir*/) } // Assert @@ -318,7 +318,7 @@ func TestOperationExecutor_UnmountVolume_ConcurrentUnmountForAllPlugins_VolumeMo VolumeSpec: tmpSpec, } } - oe.UnmountVolume(volumesToUnmount[i], nil /* actualStateOfWorldMounterUpdater */) + oe.UnmountVolume(volumesToUnmount[i], nil /* actualStateOfWorldMounterUpdater */, "" /* podsDir */) } // Assert @@ -372,7 +372,7 @@ func (fopg *fakeOperationGenerator) GenerateMountVolumeFunc(waitForAttachTimeout OperationFunc: opFunc, }, nil } -func (fopg *fakeOperationGenerator) GenerateUnmountVolumeFunc(volumeToUnmount MountedVolume, actualStateOfWorld ActualStateOfWorldMounterUpdater) (volumetypes.GeneratedOperations, error) { +func (fopg *fakeOperationGenerator) GenerateUnmountVolumeFunc(volumeToUnmount MountedVolume, actualStateOfWorld ActualStateOfWorldMounterUpdater, podsDir string) (volumetypes.GeneratedOperations, error) { opFunc := func() (error, error) { startOperationAndBlock(fopg.ch, fopg.quit) return nil, nil diff --git a/pkg/volume/util/operationexecutor/operation_generator.go b/pkg/volume/util/operationexecutor/operation_generator.go index 21e4bbd04b73..f985055fcdd8 100644 --- a/pkg/volume/util/operationexecutor/operation_generator.go +++ b/pkg/volume/util/operationexecutor/operation_generator.go @@ -18,6 +18,7 @@ package operationexecutor import ( "fmt" + "path" "strings" "time" @@ -85,7 +86,7 @@ type OperationGenerator interface { GenerateMountVolumeFunc(waitForAttachTimeout time.Duration, volumeToMount VolumeToMount, actualStateOfWorldMounterUpdater ActualStateOfWorldMounterUpdater, isRemount bool) (volumetypes.GeneratedOperations, error) // Generates the UnmountVolume function needed to perform the unmount of a volume plugin - GenerateUnmountVolumeFunc(volumeToUnmount MountedVolume, actualStateOfWorld ActualStateOfWorldMounterUpdater) (volumetypes.GeneratedOperations, error) + GenerateUnmountVolumeFunc(volumeToUnmount MountedVolume, actualStateOfWorld ActualStateOfWorldMounterUpdater, podsDir string) (volumetypes.GeneratedOperations, error) // Generates the AttachVolume function needed to perform attach of a volume plugin GenerateAttachVolumeFunc(volumeToAttach VolumeToAttach, actualStateOfWorld ActualStateOfWorldAttacherUpdater) (volumetypes.GeneratedOperations, error) @@ -651,7 +652,8 @@ func (og *operationGenerator) resizeFileSystem(volumeToMount VolumeToMount, devi func (og *operationGenerator) GenerateUnmountVolumeFunc( volumeToUnmount MountedVolume, - actualStateOfWorld ActualStateOfWorldMounterUpdater) (volumetypes.GeneratedOperations, error) { + actualStateOfWorld ActualStateOfWorldMounterUpdater, + podsDir string) (volumetypes.GeneratedOperations, error) { // Get mountable plugin volumePlugin, err := og.volumePluginMgr.FindPluginByName(volumeToUnmount.PluginName) @@ -666,6 +668,14 @@ func (og *operationGenerator) GenerateUnmountVolumeFunc( } unmountVolumeFunc := func() (error, error) { + mounter := og.volumePluginMgr.Host.GetMounter(volumeToUnmount.PluginName) + + // Remove all bind-mounts for subPaths + podDir := path.Join(podsDir, string(volumeToUnmount.PodUID)) + if err := mounter.CleanSubPaths(podDir, volumeToUnmount.OuterVolumeSpecName); err != nil { + return volumeToUnmount.GenerateError("error cleaning subPath mounts", err) + } + // Execute unmount unmountErr := volumeUnmounter.TearDown() if unmountErr != nil { From ba818d8dffd0f8c76471e05d931b20df6569fa2a Mon Sep 17 00:00:00 2001 From: Jan Safranek Date: Mon, 5 Mar 2018 09:14:44 +0100 Subject: [PATCH 2/3] Add subpath e2e tests --- test/e2e/framework/util.go | 8 +- test/e2e/storage/BUILD | 11 +- test/e2e/storage/subpath.go | 743 ++++++++++++++++++++++++++++++++++++ 3 files changed, 752 insertions(+), 10 deletions(-) create mode 100644 test/e2e/storage/subpath.go diff --git a/test/e2e/framework/util.go b/test/e2e/framework/util.go index a334a71397e1..964a9aeead81 100644 --- a/test/e2e/framework/util.go +++ b/test/e2e/framework/util.go @@ -1368,23 +1368,23 @@ func WaitForPodRunningInNamespace(c clientset.Interface, pod *v1.Pod) error { if pod.Status.Phase == v1.PodRunning { return nil } - return waitTimeoutForPodRunningInNamespace(c, pod.Name, pod.Namespace, PodStartTimeout) + return WaitTimeoutForPodRunningInNamespace(c, pod.Name, pod.Namespace, PodStartTimeout) } // Waits default amount of time (PodStartTimeout) for the specified pod to become running. // Returns an error if timeout occurs first, or pod goes in to failed state. func WaitForPodNameRunningInNamespace(c clientset.Interface, podName, namespace string) error { - return waitTimeoutForPodRunningInNamespace(c, podName, namespace, PodStartTimeout) + return WaitTimeoutForPodRunningInNamespace(c, podName, namespace, PodStartTimeout) } // Waits an extended amount of time (slowPodStartTimeout) for the specified pod to become running. // The resourceVersion is used when Watching object changes, it tells since when we care // about changes to the pod. Returns an error if timeout occurs first, or pod goes in to failed state. func waitForPodRunningInNamespaceSlow(c clientset.Interface, podName, namespace string) error { - return waitTimeoutForPodRunningInNamespace(c, podName, namespace, slowPodStartTimeout) + return WaitTimeoutForPodRunningInNamespace(c, podName, namespace, slowPodStartTimeout) } -func waitTimeoutForPodRunningInNamespace(c clientset.Interface, podName, namespace string, timeout time.Duration) error { +func WaitTimeoutForPodRunningInNamespace(c clientset.Interface, podName, namespace string, timeout time.Duration) error { return wait.PollImmediate(Poll, timeout, podRunning(c, podName, namespace)) } diff --git a/test/e2e/storage/BUILD b/test/e2e/storage/BUILD index 994c4aad7cf7..af103b9f043e 100644 --- a/test/e2e/storage/BUILD +++ b/test/e2e/storage/BUILD @@ -1,9 +1,4 @@ -package(default_visibility = ["//visibility:public"]) - -load( - "@io_bazel_rules_go//go:def.bzl", - "go_library", -) +load("@io_bazel_rules_go//go:def.bzl", "go_library") go_library( name = "go_default_library", @@ -21,6 +16,7 @@ go_library( "pv_protection.go", "pvc_protection.go", "regional_pd.go", + "subpath.go", "volume_expand.go", "volume_io.go", "volume_metrics.go", @@ -28,6 +24,7 @@ go_library( "volumes.go", ], importpath = "k8s.io/kubernetes/test/e2e/storage", + visibility = ["//visibility:public"], deps = [ "//pkg/api/testapi:go_default_library", "//pkg/api/v1/pod:go_default_library", @@ -70,6 +67,7 @@ go_library( "//vendor/k8s.io/apimachinery/pkg/types:go_default_library", "//vendor/k8s.io/apimachinery/pkg/util/errors:go_default_library", "//vendor/k8s.io/apimachinery/pkg/util/intstr:go_default_library", + "//vendor/k8s.io/apimachinery/pkg/util/rand:go_default_library", "//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library", "//vendor/k8s.io/apimachinery/pkg/util/uuid:go_default_library", "//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library", @@ -95,4 +93,5 @@ filegroup( "//test/e2e/storage/vsphere:all-srcs", ], tags = ["automanaged"], + visibility = ["//visibility:public"], ) diff --git a/test/e2e/storage/subpath.go b/test/e2e/storage/subpath.go new file mode 100644 index 000000000000..4a0aa1ff4ff2 --- /dev/null +++ b/test/e2e/storage/subpath.go @@ -0,0 +1,743 @@ +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package storage + +import ( + "fmt" + "path/filepath" + "strings" + + "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" + utilerrors "k8s.io/apimachinery/pkg/util/errors" + "k8s.io/apimachinery/pkg/util/rand" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/kubernetes/test/e2e/framework" + "k8s.io/kubernetes/test/e2e/storage/utils" + imageutils "k8s.io/kubernetes/test/utils/image" + + "time" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" +) + +var ( + volumePath = "/test-volume" + volumeName = "test-volume" + fileName = "test-file" + retryDuration = 180 + mountImage = imageutils.GetE2EImage(imageutils.Mounttest) +) + +type volInfo struct { + source *v1.VolumeSource + node string +} + +type volSource interface { + createVolume(f *framework.Framework) volInfo + cleanupVolume(f *framework.Framework) +} + +var initVolSources = map[string]func() volSource{ + "hostPath": initHostpath, + "hostPathSymlink": initHostpathSymlink, + "emptyDir": initEmptydir, + "gcePD": initGCEPD, + "gcePDPartitioned": initGCEPDPartition, + "nfs": initNFS, + "nfsPVC": initNFSPVC, + "gluster": initGluster, +} + +var _ = utils.SIGDescribe("Subpath", func() { + var ( + subPath string + subPathDir string + filePathInSubpath string + filePathInVolume string + pod *v1.Pod + vol volSource + ) + + f := framework.NewDefaultFramework("subpath") + + for volType, volInit := range initVolSources { + curVolType := volType + curVolInit := volInit + + Context(fmt.Sprintf("[Volume type: %v]", curVolType), func() { + BeforeEach(func() { + By(fmt.Sprintf("Initializing %s volume", curVolType)) + vol = curVolInit() + subPath = f.Namespace.Name + subPathDir = filepath.Join(volumePath, subPath) + filePathInSubpath = filepath.Join(volumePath, fileName) + filePathInVolume = filepath.Join(subPathDir, fileName) + volInfo := vol.createVolume(f) + pod = testPodSubpath(subPath, curVolType, volInfo.source) + pod.Namespace = f.Namespace.Name + pod.Spec.NodeName = volInfo.node + }) + + AfterEach(func() { + By("Deleting pod") + err := framework.DeletePodWithWait(f, f.ClientSet, pod) + Expect(err).ToNot(HaveOccurred()) + + By("Cleaning up volume") + vol.cleanupVolume(f) + }) + + It("should support non-existent path", func() { + // Write the file in the subPath from container 0 + setWriteCommand(filePathInSubpath, &pod.Spec.Containers[0]) + + // Read it from outside the subPath from container 1 + testReadFile(f, filePathInVolume, pod, 1) + }) + + It("should support existing directory", func() { + // Create the directory + setInitCommand(pod, fmt.Sprintf("mkdir -p %s", subPathDir)) + + // Write the file in the subPath from container 0 + setWriteCommand(filePathInSubpath, &pod.Spec.Containers[0]) + + // Read it from outside the subPath from container 1 + testReadFile(f, filePathInVolume, pod, 1) + }) + + It("should support existing single file", func() { + // Create the file in the init container + setInitCommand(pod, fmt.Sprintf("mkdir -p %s; echo \"mount-tester new file\" > %s", subPathDir, filePathInVolume)) + + // Read it from inside the subPath from container 0 + testReadFile(f, filePathInSubpath, pod, 0) + }) + + It("should fail if subpath directory is outside the volume [Slow]", func() { + // Create the subpath outside the volume + setInitCommand(pod, fmt.Sprintf("ln -s /bin %s", subPathDir)) + + // Pod should fail + testPodFailSupath(f, pod) + }) + + It("should fail if subpath file is outside the volume [Slow]", func() { + // Create the subpath outside the volume + setInitCommand(pod, fmt.Sprintf("ln -s /bin/sh %s", subPathDir)) + + // Pod should fail + testPodFailSupath(f, pod) + }) + + It("should fail if non-existent subpath is outside the volume [Slow]", func() { + // Create the subpath outside the volume + setInitCommand(pod, fmt.Sprintf("ln -s /bin/notanexistingpath %s", subPathDir)) + + // Pod should fail + testPodFailSupath(f, pod) + }) + + It("should fail if subpath with backstepping is outside the volume [Slow]", func() { + // Create the subpath outside the volume + setInitCommand(pod, fmt.Sprintf("ln -s ../ %s", subPathDir)) + + // Pod should fail + testPodFailSupath(f, pod) + }) + + It("should support creating multiple subpath from same volumes [Slow]", func() { + subpathDir1 := filepath.Join(volumePath, "subpath1") + subpathDir2 := filepath.Join(volumePath, "subpath2") + filepath1 := filepath.Join("/test-subpath1", fileName) + filepath2 := filepath.Join("/test-subpath2", fileName) + setInitCommand(pod, fmt.Sprintf("mkdir -p %s; mkdir -p %s", subpathDir1, subpathDir2)) + + addSubpathVolumeContainer(&pod.Spec.Containers[0], v1.VolumeMount{ + Name: volumeName, + MountPath: "/test-subpath1", + SubPath: "subpath1", + }) + addSubpathVolumeContainer(&pod.Spec.Containers[0], v1.VolumeMount{ + Name: volumeName, + MountPath: "/test-subpath2", + SubPath: "subpath2", + }) + + addMultipleWrites(&pod.Spec.Containers[0], filepath1, filepath2) + testMultipleReads(f, pod, 0, filepath1, filepath2) + }) + + It("should support restarting containers [Slow]", func() { + // Create the directory + setInitCommand(pod, fmt.Sprintf("mkdir -p %v", subPathDir)) + + testPodContainerRestart(f, pod, filePathInVolume, filePathInSubpath) + }) + }) + } + + // TODO: add a test case for the same disk with two partitions +}) + +func testPodSubpath(subpath, volumeType string, source *v1.VolumeSource) *v1.Pod { + suffix := strings.ToLower(fmt.Sprintf("%s-%s", volumeType, rand.String(4))) + privileged := true + gracePeriod := int64(1) + return &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("pod-subpath-test-%s", suffix), + }, + Spec: v1.PodSpec{ + InitContainers: []v1.Container{ + { + Name: fmt.Sprintf("init-volume-%s", suffix), + Image: "busybox", + VolumeMounts: []v1.VolumeMount{ + { + Name: volumeName, + MountPath: volumePath, + }, + }, + SecurityContext: &v1.SecurityContext{ + Privileged: &privileged, + }, + }, + }, + Containers: []v1.Container{ + { + Name: fmt.Sprintf("test-container-subpath-%s", suffix), + Image: mountImage, + VolumeMounts: []v1.VolumeMount{ + { + Name: volumeName, + MountPath: volumePath, + SubPath: subpath, + }, + }, + SecurityContext: &v1.SecurityContext{ + Privileged: &privileged, + }, + }, + { + Name: fmt.Sprintf("test-container-volume-%s", suffix), + Image: mountImage, + VolumeMounts: []v1.VolumeMount{ + { + Name: volumeName, + MountPath: volumePath, + }, + }, + SecurityContext: &v1.SecurityContext{ + Privileged: &privileged, + }, + }, + }, + RestartPolicy: v1.RestartPolicyNever, + TerminationGracePeriodSeconds: &gracePeriod, + Volumes: []v1.Volume{ + { + Name: volumeName, + VolumeSource: *source, + }, + }, + }, + } +} + +func setInitCommand(pod *v1.Pod, command string) { + pod.Spec.InitContainers[0].Command = []string{"/bin/sh", "-ec", command} +} + +func setWriteCommand(file string, container *v1.Container) { + container.Args = []string{ + fmt.Sprintf("--new_file_0644=%v", file), + fmt.Sprintf("--file_mode=%v", file), + } +} + +func addSubpathVolumeContainer(container *v1.Container, volumeMount v1.VolumeMount) { + existingMounts := container.VolumeMounts + container.VolumeMounts = append(existingMounts, volumeMount) +} + +func addMultipleWrites(container *v1.Container, file1 string, file2 string) { + container.Args = []string{ + fmt.Sprintf("--new_file_0644=%v", file1), + fmt.Sprintf("--new_file_0666=%v", file2), + } +} + +func testMultipleReads(f *framework.Framework, pod *v1.Pod, containerIndex int, file1 string, file2 string) { + By(fmt.Sprintf("Creating pod %s", pod.Name)) + f.TestContainerOutput("multi_subpath", pod, containerIndex, []string{ + "content of file \"" + file1 + "\": mount-tester new file", + "content of file \"" + file2 + "\": mount-tester new file", + }) +} + +func setReadCommand(file string, container *v1.Container) { + container.Args = []string{ + fmt.Sprintf("--file_content_in_loop=%v", file), + fmt.Sprintf("--retry_time=%d", retryDuration), + } +} + +func testReadFile(f *framework.Framework, file string, pod *v1.Pod, containerIndex int) { + setReadCommand(file, &pod.Spec.Containers[containerIndex]) + + By(fmt.Sprintf("Creating pod %s", pod.Name)) + f.TestContainerOutput("subpath", pod, containerIndex, []string{ + "content of file \"" + file + "\": mount-tester new file", + }) +} + +func testPodFailSupath(f *framework.Framework, pod *v1.Pod) { + By(fmt.Sprintf("Creating pod %s", pod.Name)) + pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Create(pod) + Expect(err).ToNot(HaveOccurred()) + defer func() { + framework.DeletePodWithWait(f, f.ClientSet, pod) + }() + err = framework.WaitTimeoutForPodRunningInNamespace(f.ClientSet, pod.Name, pod.Namespace, time.Minute) + Expect(err).To(HaveOccurred()) + + By("Checking for subpath error event") + selector := fields.Set{ + "involvedObject.kind": "Pod", + "involvedObject.name": pod.Name, + "involvedObject.namespace": f.Namespace.Name, + "reason": "Failed", + }.AsSelector().String() + options := metav1.ListOptions{FieldSelector: selector} + events, err := f.ClientSet.CoreV1().Events(f.Namespace.Name).List(options) + Expect(err).NotTo(HaveOccurred()) + Expect(len(events.Items)).NotTo(Equal(0)) + Expect(events.Items[0].Message).To(ContainSubstring("subPath")) +} + +func testPodContainerRestart(f *framework.Framework, pod *v1.Pod, fileInVolume, fileInSubpath string) { + pod.Spec.RestartPolicy = v1.RestartPolicyOnFailure + + // Add liveness probe to subpath container + pod.Spec.Containers[0].Image = "busybox" + pod.Spec.Containers[0].Command = []string{"/bin/sh", "-ec", "sleep 100000"} + pod.Spec.Containers[0].LivenessProbe = &v1.Probe{ + Handler: v1.Handler{ + Exec: &v1.ExecAction{ + Command: []string{"cat", fileInSubpath}, + }, + }, + InitialDelaySeconds: 15, + FailureThreshold: 1, + PeriodSeconds: 2, + } + + // Set volume container to write file + writeCmd := fmt.Sprintf("echo test > %v", fileInVolume) + pod.Spec.Containers[1].Image = "busybox" + pod.Spec.Containers[1].Command = []string{"/bin/sh", "-ec", fmt.Sprintf("%v; sleep 100000", writeCmd)} + + // Start pod + By(fmt.Sprintf("Creating pod %s", pod.Name)) + pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Create(pod) + Expect(err).ToNot(HaveOccurred()) + + err = framework.WaitTimeoutForPodRunningInNamespace(f.ClientSet, pod.Name, pod.Namespace, time.Minute) + Expect(err).ToNot(HaveOccurred()) + + By("Failing liveness probe") + out, err := podContainerExec(pod, 1, fmt.Sprintf("rm %v", fileInVolume)) + framework.Logf("Pod exec output: %v", out) + Expect(err).ToNot(HaveOccurred()) + + // Check that container has restarted + By("Waiting for container to restart") + restarts := int32(0) + err = wait.PollImmediate(10*time.Second, 2*time.Minute, func() (bool, error) { + pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(pod.Name, metav1.GetOptions{}) + if err != nil { + return false, err + } + for _, status := range pod.Status.ContainerStatuses { + if status.Name == pod.Spec.Containers[0].Name { + framework.Logf("Container %v, restarts: %v", status.Name, status.RestartCount) + restarts = status.RestartCount + if restarts > 0 { + framework.Logf("Container has restart count: %v", restarts) + return true, nil + } + } + } + return false, nil + }) + Expect(err).ToNot(HaveOccurred()) + + // Fix liveness probe + By("Rewriting the file") + writeCmd = fmt.Sprintf("echo test-after > %v", fileInVolume) + out, err = podContainerExec(pod, 1, writeCmd) + framework.Logf("Pod exec output: %v", out) + Expect(err).ToNot(HaveOccurred()) + + // Wait for container restarts to stabilize + By("Waiting for container to stop restarting") + stableCount := int(0) + stableThreshold := int(time.Minute / framework.Poll) + err = wait.PollImmediate(framework.Poll, 2*time.Minute, func() (bool, error) { + pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(pod.Name, metav1.GetOptions{}) + if err != nil { + return false, err + } + for _, status := range pod.Status.ContainerStatuses { + if status.Name == pod.Spec.Containers[0].Name { + if status.RestartCount == restarts { + stableCount++ + if stableCount > stableThreshold { + framework.Logf("Container restart has stabilized") + return true, nil + } + } else { + restarts = status.RestartCount + stableCount = 0 + framework.Logf("Container has restart count: %v", restarts) + } + break + } + } + return false, nil + }) + Expect(err).ToNot(HaveOccurred()) + + // Verify content of file in subpath + out, err = podContainerExec(pod, 0, fmt.Sprintf("cat %v", fileInSubpath)) + framework.Logf("Pod exec output: %v", out) + Expect(err).ToNot(HaveOccurred()) + Expect(strings.TrimSpace(out)).To(BeEquivalentTo("test-after")) +} + +func podContainerExec(pod *v1.Pod, containerIndex int, bashExec string) (string, error) { + return framework.RunKubectl("exec", fmt.Sprintf("--namespace=%s", pod.Namespace), pod.Name, "--container", pod.Spec.Containers[containerIndex].Name, "--", "/bin/sh", "-c", bashExec) +} + +type hostpathSource struct { +} + +func initHostpath() volSource { + return &hostpathSource{} +} + +func (s *hostpathSource) createVolume(f *framework.Framework) volInfo { + return volInfo{ + source: &v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{ + Path: "/tmp", + }, + }, + } +} + +func (s *hostpathSource) cleanupVolume(f *framework.Framework) { +} + +type hostpathSymlinkSource struct { +} + +func initHostpathSymlink() volSource { + return &hostpathSymlinkSource{} +} + +func (s *hostpathSymlinkSource) createVolume(f *framework.Framework) volInfo { + nodes := framework.GetReadySchedulableNodesOrDie(f.ClientSet) + Expect(len(nodes.Items)).NotTo(BeZero(), "No available nodes for scheduling") + + node0 := &nodes.Items[0] + sourcePath := fmt.Sprintf("/tmp/%v", f.Namespace.Name) + targetPath := fmt.Sprintf("/tmp/%v-link", f.Namespace.Name) + cmd := fmt.Sprintf("mkdir %v -m 777 && ln -s %v %v", sourcePath, sourcePath, targetPath) + privileged := true + + // Launch pod to initialize hostpath directory and symlink + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("hostpath-symlink-prep-%s", f.Namespace.Name), + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: fmt.Sprintf("init-volume-%s", f.Namespace.Name), + Image: "busybox", + Command: []string{"/bin/sh", "-ec", cmd}, + VolumeMounts: []v1.VolumeMount{ + { + Name: volumeName, + MountPath: "/tmp", + }, + }, + SecurityContext: &v1.SecurityContext{ + Privileged: &privileged, + }, + }, + }, + RestartPolicy: v1.RestartPolicyNever, + Volumes: []v1.Volume{ + { + Name: volumeName, + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{ + Path: "/tmp", + }, + }, + }, + }, + NodeName: node0.Name, + }, + } + pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Create(pod) + Expect(err).ToNot(HaveOccurred()) + + err = framework.WaitForPodSuccessInNamespace(f.ClientSet, pod.Name, pod.Namespace) + Expect(err).ToNot(HaveOccurred()) + + err = framework.DeletePodWithWait(f, f.ClientSet, pod) + Expect(err).ToNot(HaveOccurred()) + + return volInfo{ + source: &v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{ + Path: targetPath, + }, + }, + node: node0.Name, + } +} + +func (s *hostpathSymlinkSource) cleanupVolume(f *framework.Framework) { +} + +type emptydirSource struct { +} + +func initEmptydir() volSource { + return &emptydirSource{} +} + +func (s *emptydirSource) createVolume(f *framework.Framework) volInfo { + return volInfo{ + source: &v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{}, + }, + } +} + +func (s *emptydirSource) cleanupVolume(f *framework.Framework) { +} + +type gcepdSource struct { + diskName string +} + +func initGCEPD() volSource { + framework.SkipUnlessProviderIs("gce", "gke") + return &gcepdSource{} +} + +func (s *gcepdSource) createVolume(f *framework.Framework) volInfo { + var err error + + framework.Logf("Creating GCE PD volume") + s.diskName, err = framework.CreatePDWithRetry() + framework.ExpectNoError(err, "Error creating PD") + + return volInfo{ + source: &v1.VolumeSource{ + GCEPersistentDisk: &v1.GCEPersistentDiskVolumeSource{PDName: s.diskName}, + }, + } +} + +func (s *gcepdSource) cleanupVolume(f *framework.Framework) { + if s.diskName != "" { + err := framework.DeletePDWithRetry(s.diskName) + framework.ExpectNoError(err, "Error deleting PD") + } +} + +type gcepdPartitionSource struct { + diskName string +} + +func initGCEPDPartition() volSource { + // Need to manually create, attach, partition, detach the GCE PD + // with disk name "subpath-partitioned-disk" before running this test + manual := true + if manual { + framework.Skipf("Skipping manual GCE PD partition test") + } + framework.SkipUnlessProviderIs("gce", "gke") + return &gcepdPartitionSource{diskName: "subpath-partitioned-disk"} +} + +func (s *gcepdPartitionSource) createVolume(f *framework.Framework) volInfo { + // TODO: automate partitioned of GCE PD once it supports raw block volumes + // framework.Logf("Creating GCE PD volume") + // s.diskName, err = framework.CreatePDWithRetry() + // framework.ExpectNoError(err, "Error creating PD") + + return volInfo{ + source: &v1.VolumeSource{ + GCEPersistentDisk: &v1.GCEPersistentDiskVolumeSource{ + PDName: s.diskName, + Partition: 1, + }, + }, + } +} + +func (s *gcepdPartitionSource) cleanupVolume(f *framework.Framework) { + if s.diskName != "" { + // err := framework.DeletePDWithRetry(s.diskName) + // framework.ExpectNoError(err, "Error deleting PD") + } +} + +type nfsSource struct { + serverPod *v1.Pod +} + +func initNFS() volSource { + return &nfsSource{} +} + +func (s *nfsSource) createVolume(f *framework.Framework) volInfo { + var serverIP string + + framework.Logf("Creating NFS server") + _, s.serverPod, serverIP = framework.NewNFSServer(f.ClientSet, f.Namespace.Name, []string{"-G", "777", "/exports"}) + + return volInfo{ + source: &v1.VolumeSource{ + NFS: &v1.NFSVolumeSource{ + Server: serverIP, + Path: "/exports", + }, + }, + } +} + +func (s *nfsSource) cleanupVolume(f *framework.Framework) { + if s.serverPod != nil { + framework.DeletePodWithWait(f, f.ClientSet, s.serverPod) + } +} + +type glusterSource struct { + serverPod *v1.Pod +} + +func initGluster() volSource { + framework.SkipUnlessNodeOSDistroIs("gci", "ubuntu") + return &glusterSource{} +} + +func (s *glusterSource) createVolume(f *framework.Framework) volInfo { + framework.Logf("Creating GlusterFS server") + _, s.serverPod, _ = framework.NewGlusterfsServer(f.ClientSet, f.Namespace.Name) + + return volInfo{ + source: &v1.VolumeSource{ + Glusterfs: &v1.GlusterfsVolumeSource{ + EndpointsName: "gluster-server", + Path: "test_vol", + }, + }, + } +} + +func (s *glusterSource) cleanupVolume(f *framework.Framework) { + if s.serverPod != nil { + framework.DeletePodWithWait(f, f.ClientSet, s.serverPod) + err := f.ClientSet.CoreV1().Endpoints(f.Namespace.Name).Delete("gluster-server", nil) + Expect(err).NotTo(HaveOccurred(), "Gluster delete endpoints failed") + } +} + +// TODO: need a better way to wrap PVC. A generic framework should support both static and dynamic PV. +// For static PV, can reuse createVolume methods for inline volumes +type nfsPVCSource struct { + serverPod *v1.Pod + pvc *v1.PersistentVolumeClaim + pv *v1.PersistentVolume +} + +func initNFSPVC() volSource { + return &nfsPVCSource{} +} + +func (s *nfsPVCSource) createVolume(f *framework.Framework) volInfo { + var serverIP string + + framework.Logf("Creating NFS server") + _, s.serverPod, serverIP = framework.NewNFSServer(f.ClientSet, f.Namespace.Name, []string{"-G", "777", "/exports"}) + + pvConfig := framework.PersistentVolumeConfig{ + NamePrefix: "nfs-", + StorageClassName: f.Namespace.Name, + PVSource: v1.PersistentVolumeSource{ + NFS: &v1.NFSVolumeSource{ + Server: serverIP, + Path: "/exports", + }, + }, + } + pvcConfig := framework.PersistentVolumeClaimConfig{ + StorageClassName: &f.Namespace.Name, + } + + framework.Logf("Creating PVC and PV") + pv, pvc, err := framework.CreatePVCPV(f.ClientSet, pvConfig, pvcConfig, f.Namespace.Name, false) + Expect(err).NotTo(HaveOccurred(), "PVC, PV creation failed") + + err = framework.WaitOnPVandPVC(f.ClientSet, f.Namespace.Name, pv, pvc) + Expect(err).NotTo(HaveOccurred(), "PVC, PV failed to bind") + + s.pvc = pvc + s.pv = pv + + return volInfo{ + source: &v1.VolumeSource{ + PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ + ClaimName: pvc.Name, + }, + }, + } +} + +func (s *nfsPVCSource) cleanupVolume(f *framework.Framework) { + if s.pvc != nil || s.pv != nil { + if errs := framework.PVPVCCleanup(f.ClientSet, f.Namespace.Name, s.pv, s.pvc); len(errs) != 0 { + framework.Failf("Failed to delete PVC or PV: %v", utilerrors.NewAggregate(errs)) + } + } + if s.serverPod != nil { + framework.DeletePodWithWait(f, f.ClientSet, s.serverPod) + } +} From f6d97b5d2b139c593991a50064e3aa352600ca39 Mon Sep 17 00:00:00 2001 From: Michelle Au Date: Mon, 5 Mar 2018 09:14:44 +0100 Subject: [PATCH 3/3] Add feature gate for subpath --- pkg/apis/core/validation/validation.go | 6 +- pkg/apis/core/validation/validation_test.go | 62 +++++++++++++++++++++ pkg/features/kube_features.go | 8 +++ pkg/kubelet/kubelet_pods.go | 4 ++ pkg/kubelet/kubelet_pods_test.go | 56 +++++++++++++++++++ 5 files changed, 135 insertions(+), 1 deletion(-) diff --git a/pkg/apis/core/validation/validation.go b/pkg/apis/core/validation/validation.go index b534b89f441c..5b54d6fe642c 100644 --- a/pkg/apis/core/validation/validation.go +++ b/pkg/apis/core/validation/validation.go @@ -2264,7 +2264,11 @@ func ValidateVolumeMounts(mounts []core.VolumeMount, voldevices map[string]strin } if len(mnt.SubPath) > 0 { - allErrs = append(allErrs, validateLocalDescendingPath(mnt.SubPath, fldPath.Child("subPath"))...) + if !utilfeature.DefaultFeatureGate.Enabled(features.VolumeSubpath) { + allErrs = append(allErrs, field.Forbidden(fldPath.Child("subPath"), "subPath is disabled by feature-gate")) + } else { + allErrs = append(allErrs, validateLocalDescendingPath(mnt.SubPath, fldPath.Child("subPath"))...) + } } if mnt.MountPropagation != nil { diff --git a/pkg/apis/core/validation/validation_test.go b/pkg/apis/core/validation/validation_test.go index ce19b450e612..a48c04f7c52b 100644 --- a/pkg/apis/core/validation/validation_test.go +++ b/pkg/apis/core/validation/validation_test.go @@ -4766,6 +4766,68 @@ func TestValidateVolumeMounts(t *testing.T) { } } +func TestValidateDisabledSubpath(t *testing.T) { + utilfeature.DefaultFeatureGate.Set("VolumeSubpath=false") + defer utilfeature.DefaultFeatureGate.Set("VolumeSubpath=true") + + volumes := []core.Volume{ + {Name: "abc", VolumeSource: core.VolumeSource{PersistentVolumeClaim: &core.PersistentVolumeClaimVolumeSource{ClaimName: "testclaim1"}}}, + {Name: "abc-123", VolumeSource: core.VolumeSource{PersistentVolumeClaim: &core.PersistentVolumeClaimVolumeSource{ClaimName: "testclaim2"}}}, + {Name: "123", VolumeSource: core.VolumeSource{HostPath: &core.HostPathVolumeSource{Path: "/foo/baz", Type: newHostPathType(string(core.HostPathUnset))}}}, + } + vols, v1err := ValidateVolumes(volumes, field.NewPath("field")) + if len(v1err) > 0 { + t.Errorf("Invalid test volume - expected success %v", v1err) + return + } + + container := core.Container{ + SecurityContext: nil, + } + + goodVolumeDevices := []core.VolumeDevice{ + {Name: "xyz", DevicePath: "/foofoo"}, + {Name: "uvw", DevicePath: "/foofoo/share/test"}, + } + + cases := map[string]struct { + mounts []core.VolumeMount + expectError bool + }{ + "subpath not specified": { + []core.VolumeMount{ + { + Name: "abc-123", + MountPath: "/bab", + }, + }, + false, + }, + "subpath specified": { + []core.VolumeMount{ + { + Name: "abc-123", + MountPath: "/bab", + SubPath: "baz", + }, + }, + true, + }, + } + + for name, test := range cases { + errs := ValidateVolumeMounts(test.mounts, GetVolumeDeviceMap(goodVolumeDevices), vols, &container, field.NewPath("field")) + + if len(errs) != 0 && !test.expectError { + t.Errorf("test %v failed: %+v", name, errs) + } + + if len(errs) == 0 && test.expectError { + t.Errorf("test %v failed, expected error", name) + } + } +} + func TestValidateMountPropagation(t *testing.T) { bTrue := true bFalse := false diff --git a/pkg/features/kube_features.go b/pkg/features/kube_features.go index 10a58493dfdb..4ec194d55ee5 100644 --- a/pkg/features/kube_features.go +++ b/pkg/features/kube_features.go @@ -267,6 +267,13 @@ const ( // // Enables control over the primary group ID of containers' init processes. RunAsGroup utilfeature.Feature = "RunAsGroup" + + // owner: @saad-ali + // ga + // + // Allow mounting a subpath of a volume in a container + // Do not remove this feature gate even though it's GA + VolumeSubpath utilfeature.Feature = "VolumeSubpath" ) func init() { @@ -313,6 +320,7 @@ var defaultKubernetesFeatureGates = map[utilfeature.Feature]utilfeature.FeatureS CRIContainerLogRotation: {Default: false, PreRelease: utilfeature.Alpha}, GCERegionalPersistentDisk: {Default: true, PreRelease: utilfeature.Beta}, RunAsGroup: {Default: false, PreRelease: utilfeature.Alpha}, + VolumeSubpath: {Default: true, PreRelease: utilfeature.GA}, // inherited features from generic apiserver, relisted here to get a conflict if it is changed // unintentionally on either side: diff --git a/pkg/kubelet/kubelet_pods.go b/pkg/kubelet/kubelet_pods.go index 6698a5df87a2..2712448cf816 100644 --- a/pkg/kubelet/kubelet_pods.go +++ b/pkg/kubelet/kubelet_pods.go @@ -194,6 +194,10 @@ func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, h return nil, cleanupAction, err } if mount.SubPath != "" { + if !utilfeature.DefaultFeatureGate.Enabled(features.VolumeSubpath) { + return nil, cleanupAction, fmt.Errorf("volume subpaths are disabled") + } + if filepath.IsAbs(mount.SubPath) { return nil, cleanupAction, fmt.Errorf("error SubPath `%s` must not be an absolute path", mount.SubPath) } diff --git a/pkg/kubelet/kubelet_pods_test.go b/pkg/kubelet/kubelet_pods_test.go index ee00373bf318..92552f5227bb 100644 --- a/pkg/kubelet/kubelet_pods_test.go +++ b/pkg/kubelet/kubelet_pods_test.go @@ -355,6 +355,62 @@ func TestMakeMounts(t *testing.T) { } } +func TestDisabledSubpath(t *testing.T) { + fm := &mount.FakeMounter{} + pod := v1.Pod{ + Spec: v1.PodSpec{ + HostNetwork: true, + }, + } + podVolumes := kubecontainer.VolumeMap{ + "disk": kubecontainer.VolumeInfo{Mounter: &stubVolume{path: "/mnt/disk"}}, + } + + cases := map[string]struct { + container v1.Container + expectError bool + }{ + "subpath not specified": { + v1.Container{ + VolumeMounts: []v1.VolumeMount{ + { + MountPath: "/mnt/path3", + Name: "disk", + ReadOnly: true, + }, + }, + }, + false, + }, + "subpath specified": { + v1.Container{ + VolumeMounts: []v1.VolumeMount{ + { + MountPath: "/mnt/path3", + SubPath: "/must/not/be/absolute", + Name: "disk", + ReadOnly: true, + }, + }, + }, + true, + }, + } + + utilfeature.DefaultFeatureGate.Set("VolumeSubpath=false") + defer utilfeature.DefaultFeatureGate.Set("VolumeSubpath=true") + + for name, test := range cases { + _, _, err := makeMounts(&pod, "/pod", &test.container, "fakepodname", "", "", podVolumes, fm) + if err != nil && !test.expectError { + t.Errorf("test %v failed: %v", name, err) + } + if err == nil && test.expectError { + t.Errorf("test %v failed: expected error", name) + } + } +} + func TestMakeBlockVolumes(t *testing.T) { testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */) defer testKubelet.Cleanup()