Skip to content

Commit

Permalink
Merge pull request #63143 from jsafrane/containerized-subpath
Browse files Browse the repository at this point in the history
Automatic merge from submit-queue (batch tested with PRs 63348, 63839, 63143, 64447, 64567). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Containerized subpath

**What this PR does / why we need it**:
Containerized kubelet needs a different implementation of `PrepareSafeSubpath` than kubelet running directly on the host.

On the host we safely open the subpath and then bind-mount `/proc/<pidof kubelet>/fd/<descriptor of opened subpath>`.

With kubelet running in a container, `/proc/xxx/fd/yy` on the host contains path that works only inside the container, i.e. `/rootfs/path/to/subpath` and thus any bind-mount on the host fails.

Solution:
- safely open the subpath and gets its device ID and inode number
- blindly bind-mount the subpath to `/var/lib/kubelet/pods/<uid>/volume-subpaths/<name of container>/<id of mount>`. This is potentially unsafe, because user can change the subpath source to a link to a bad place (say `/run/docker.sock`) just before the bind-mount.
- get device ID and inode number of the destination. Typical users can't modify this file, as it lies on /var/lib/kubelet on the host.
- compare these device IDs and inode numbers.

**Which issue(s) this PR fixes**
Fixes #61456

**Special notes for your reviewer**:

The PR contains some refactoring of `doBindSubPath` to extract the common code. New `doNsEnterBindSubPath` is added for the nsenter related parts.

**Release note**:

```release-note
NONE
```
  • Loading branch information
Kubernetes Submit Queue committed Jun 1, 2018
2 parents 5710943 + cb5eb25 commit d2495b8
Show file tree
Hide file tree
Showing 25 changed files with 1,493 additions and 305 deletions.
2 changes: 2 additions & 0 deletions cmd/kubelet/app/BUILD
Expand Up @@ -110,6 +110,7 @@ go_library(
"//pkg/util/io:go_default_library",
"//pkg/util/mount:go_default_library",
"//pkg/util/node:go_default_library",
"//pkg/util/nsenter:go_default_library",
"//pkg/util/oom:go_default_library",
"//pkg/util/rlimit:go_default_library",
"//pkg/version:go_default_library",
Expand Down Expand Up @@ -170,6 +171,7 @@ go_library(
"//vendor/k8s.io/client-go/tools/record:go_default_library",
"//vendor/k8s.io/client-go/util/cert:go_default_library",
"//vendor/k8s.io/client-go/util/certificate:go_default_library",
"//vendor/k8s.io/utils/exec:go_default_library",
] + select({
"@io_bazel_rules_go//go/platform:linux": [
"//vendor/golang.org/x/exp/inotify:go_default_library",
Expand Down
7 changes: 5 additions & 2 deletions cmd/kubelet/app/server.go
Expand Up @@ -91,10 +91,12 @@ import (
kubeio "k8s.io/kubernetes/pkg/util/io"
"k8s.io/kubernetes/pkg/util/mount"
nodeutil "k8s.io/kubernetes/pkg/util/node"
"k8s.io/kubernetes/pkg/util/nsenter"
"k8s.io/kubernetes/pkg/util/oom"
"k8s.io/kubernetes/pkg/util/rlimit"
"k8s.io/kubernetes/pkg/version"
"k8s.io/kubernetes/pkg/version/verflag"
"k8s.io/utils/exec"
)

const (
Expand Down Expand Up @@ -361,11 +363,12 @@ func UnsecuredDependencies(s *options.KubeletServer) (*kubelet.Dependencies, err
var writer kubeio.Writer = &kubeio.StdWriter{}
if s.Containerized {
glog.V(2).Info("Running kubelet in containerized mode")
mounter, err = mount.NewNsenterMounter()
ne, err := nsenter.NewNsenter(nsenter.DefaultHostRootFsPath, exec.New())
if err != nil {
return nil, err
}
writer = &kubeio.NsenterWriter{}
mounter = mount.NewNsenterMounter(s.RootDirectory, ne)
writer = kubeio.NewNsenterWriter(ne)
}

var dockerClientConfig *dockershim.ClientConfig
Expand Down
8 changes: 6 additions & 2 deletions pkg/kubelet/cm/container_manager_linux_test.go
Expand Up @@ -92,8 +92,8 @@ func (mi *fakeMountInterface) MakeFile(pathname string) error {
return nil
}

func (mi *fakeMountInterface) ExistsPath(pathname string) bool {
return true
func (mi *fakeMountInterface) ExistsPath(pathname string) (bool, error) {
return true, errors.New("not implemented")
}

func (mi *fakeMountInterface) PrepareSafeSubpath(subPath mount.Subpath) (newHostPath string, cleanupAction func(), err error) {
Expand All @@ -120,6 +120,10 @@ func (mi *fakeMountInterface) GetSELinuxSupport(pathname string) (bool, error) {
return false, errors.New("not implemented")
}

func (mi *fakeMountInterface) GetMode(pathname string) (os.FileMode, error) {
return 0, errors.New("not implemented")
}

func fakeContainerMgrMountInt() mount.Interface {
return &fakeMountInterface{
[]mount.MountPoint{
Expand Down
23 changes: 9 additions & 14 deletions pkg/kubelet/kubelet_pods.go
Expand Up @@ -58,7 +58,6 @@ import (
"k8s.io/kubernetes/pkg/kubelet/status"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/kubelet/util/format"
utilfile "k8s.io/kubernetes/pkg/util/file"
mountutil "k8s.io/kubernetes/pkg/util/mount"
volumeutil "k8s.io/kubernetes/pkg/volume/util"
"k8s.io/kubernetes/pkg/volume/util/volumepathhandler"
Expand Down Expand Up @@ -179,30 +178,26 @@ func makeMounts(pod *v1.Pod, podDir string, container *v1.Container, hostName, h
return nil, cleanupAction, fmt.Errorf("unable to provision SubPath `%s`: %v", mount.SubPath, err)
}

fileinfo, err := os.Lstat(hostPath)
if err != nil {
return nil, cleanupAction, err
}
perm := fileinfo.Mode()

volumePath, err := filepath.EvalSymlinks(hostPath)
if err != nil {
return nil, cleanupAction, err
}
volumePath := hostPath
hostPath = filepath.Join(volumePath, mount.SubPath)

if subPathExists, err := utilfile.FileOrSymlinkExists(hostPath); err != nil {
if subPathExists, err := mounter.ExistsPath(hostPath); err != nil {
glog.Errorf("Could not determine if subPath %s exists; will not attempt to change its permissions", hostPath)
} else if !subPathExists {
// Create the sub path now because if it's auto-created later when referenced, it may have an
// incorrect ownership and mode. For example, the sub path directory must have at least g+rwx
// when the pod specifies an fsGroup, and if the directory is not created here, Docker will
// later auto-create it with the incorrect mode 0750
// Make extra care not to escape the volume!
if err := mounter.SafeMakeDir(hostPath, volumePath, perm); err != nil {
glog.Errorf("failed to mkdir %q: %v", hostPath, err)
perm, err := mounter.GetMode(volumePath)
if err != nil {
return nil, cleanupAction, err
}
if err := mounter.SafeMakeDir(mount.SubPath, volumePath, perm); err != nil {
// Don't pass detailed error back to the user because it could give information about host filesystem
glog.Errorf("failed to create subPath directory for volumeMount %q of container %q: %v", mount.Name, container.Name, err)
return nil, cleanupAction, fmt.Errorf("failed to create subPath directory for volumeMount %q of container %q", mount.Name, container.Name)
}
}
hostPath, cleanupAction, err = mounter.PrepareSafeSubpath(mountutil.Subpath{
VolumeMountIndex: i,
Expand Down
20 changes: 13 additions & 7 deletions pkg/util/io/writer.go
Expand Up @@ -50,18 +50,24 @@ func (writer *StdWriter) WriteFile(filename string, data []byte, perm os.FileMod
// it will not see the mounted device in its own namespace. To work around this
// limitation one has to first enter hosts namespace (by using 'nsenter') and
// only then write data.
type NsenterWriter struct{}
type NsenterWriter struct {
ne *nsenter.Nsenter
}

// NewNsenterWriter creates a new Writer that allows writing data to file using
// nsenter command.
func NewNsenterWriter(ne *nsenter.Nsenter) *NsenterWriter {
return &NsenterWriter{
ne: ne,
}
}

// WriteFile calls 'nsenter cat - > <the file>' and 'nsenter chmod' to create a
// file on the host.
func (writer *NsenterWriter) WriteFile(filename string, data []byte, perm os.FileMode) error {
ne, err := nsenter.NewNsenter()
if err != nil {
return err
}
echoArgs := []string{"-c", fmt.Sprintf("cat > %s", filename)}
glog.V(5).Infof("nsenter: write data to file %s by nsenter", filename)
command := ne.Exec("sh", echoArgs)
command := writer.ne.Exec("sh", echoArgs)
command.SetStdin(bytes.NewBuffer(data))
outputBytes, err := command.CombinedOutput()
if err != nil {
Expand All @@ -71,7 +77,7 @@ func (writer *NsenterWriter) WriteFile(filename string, data []byte, perm os.Fil

chmodArgs := []string{fmt.Sprintf("%o", perm), filename}
glog.V(5).Infof("nsenter: change permissions of file %s to %s", filename, chmodArgs[0])
outputBytes, err = ne.Exec("chmod", chmodArgs).CombinedOutput()
outputBytes, err = writer.ne.Exec("chmod", chmodArgs).CombinedOutput()
if err != nil {
glog.Errorf("Output from chmod command: %v", string(outputBytes))
return err
Expand Down
34 changes: 34 additions & 0 deletions pkg/util/mount/BUILD
Expand Up @@ -71,12 +71,44 @@ go_library(
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/utils/exec:go_default_library",
] + select({
"@io_bazel_rules_go//go/platform:android": [
"//pkg/util/nsenter:go_default_library",
],
"@io_bazel_rules_go//go/platform:darwin": [
"//pkg/util/nsenter:go_default_library",
],
"@io_bazel_rules_go//go/platform:dragonfly": [
"//pkg/util/nsenter:go_default_library",
],
"@io_bazel_rules_go//go/platform:freebsd": [
"//pkg/util/nsenter:go_default_library",
],
"@io_bazel_rules_go//go/platform:linux": [
"//pkg/util/file:go_default_library",
"//pkg/util/io:go_default_library",
"//pkg/util/nsenter:go_default_library",
"//vendor/golang.org/x/sys/unix:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
],
"@io_bazel_rules_go//go/platform:nacl": [
"//pkg/util/nsenter:go_default_library",
],
"@io_bazel_rules_go//go/platform:netbsd": [
"//pkg/util/nsenter:go_default_library",
],
"@io_bazel_rules_go//go/platform:openbsd": [
"//pkg/util/nsenter:go_default_library",
],
"@io_bazel_rules_go//go/platform:plan9": [
"//pkg/util/nsenter:go_default_library",
],
"@io_bazel_rules_go//go/platform:solaris": [
"//pkg/util/nsenter:go_default_library",
],
"@io_bazel_rules_go//go/platform:windows": [
"//pkg/util/file:go_default_library",
"//pkg/util/nsenter:go_default_library",
],
"//conditions:default": [],
}),
)
Expand All @@ -101,7 +133,9 @@ go_test(
"//vendor/k8s.io/utils/exec/testing:go_default_library",
] + select({
"@io_bazel_rules_go//go/platform:linux": [
"//pkg/util/nsenter:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/golang.org/x/sys/unix:go_default_library",
"//vendor/k8s.io/utils/exec:go_default_library",
],
"@io_bazel_rules_go//go/platform:windows": [
Expand Down
6 changes: 5 additions & 1 deletion pkg/util/mount/exec_mount.go
Expand Up @@ -136,7 +136,7 @@ func (m *execMounter) MakeDir(pathname string) error {
return m.wrappedMounter.MakeDir(pathname)
}

func (m *execMounter) ExistsPath(pathname string) bool {
func (m *execMounter) ExistsPath(pathname string) (bool, error) {
return m.wrappedMounter.ExistsPath(pathname)
}

Expand All @@ -163,3 +163,7 @@ func (m *execMounter) GetFSGroup(pathname string) (int64, error) {
func (m *execMounter) GetSELinuxSupport(pathname string) (bool, error) {
return m.wrappedMounter.GetSELinuxSupport(pathname)
}

func (m *execMounter) GetMode(pathname string) (os.FileMode, error) {
return m.wrappedMounter.GetMode(pathname)
}
8 changes: 6 additions & 2 deletions pkg/util/mount/exec_mount_test.go
Expand Up @@ -147,8 +147,8 @@ func (fm *fakeMounter) MakeFile(pathname string) error {
func (fm *fakeMounter) MakeDir(pathname string) error {
return nil
}
func (fm *fakeMounter) ExistsPath(pathname string) bool {
return false
func (fm *fakeMounter) ExistsPath(pathname string) (bool, error) {
return false, errors.New("not implemented")
}
func (fm *fakeMounter) GetFileType(pathname string) (FileType, error) {
return FileTypeFile, nil
Expand Down Expand Up @@ -176,3 +176,7 @@ func (fm *fakeMounter) GetFSGroup(pathname string) (int64, error) {
func (fm *fakeMounter) GetSELinuxSupport(pathname string) (bool, error) {
return false, errors.New("not implemented")
}

func (fm *fakeMounter) GetMode(pathname string) (os.FileMode, error) {
return 0, errors.New("not implemented")
}
8 changes: 6 additions & 2 deletions pkg/util/mount/exec_mount_unsupported.go
Expand Up @@ -83,8 +83,8 @@ func (mounter *execMounter) MakeFile(pathname string) error {
return nil
}

func (mounter *execMounter) ExistsPath(pathname string) bool {
return true
func (mounter *execMounter) ExistsPath(pathname string) (bool, error) {
return true, errors.New("not implemented")
}

func (mounter *execMounter) PrepareSafeSubpath(subPath Subpath) (newHostPath string, cleanupAction func(), err error) {
Expand All @@ -110,3 +110,7 @@ func (mounter *execMounter) GetFSGroup(pathname string) (int64, error) {
func (mounter *execMounter) GetSELinuxSupport(pathname string) (bool, error) {
return false, errors.New("not implemented")
}

func (mounter *execMounter) GetMode(pathname string) (os.FileMode, error) {
return 0, errors.New("not implemented")
}
8 changes: 6 additions & 2 deletions pkg/util/mount/fake.go
Expand Up @@ -201,8 +201,8 @@ func (f *FakeMounter) MakeFile(pathname string) error {
return nil
}

func (f *FakeMounter) ExistsPath(pathname string) bool {
return false
func (f *FakeMounter) ExistsPath(pathname string) (bool, error) {
return false, errors.New("not implemented")
}

func (f *FakeMounter) PrepareSafeSubpath(subPath Subpath) (newHostPath string, cleanupAction func(), err error) {
Expand Down Expand Up @@ -232,3 +232,7 @@ func (f *FakeMounter) GetFSGroup(pathname string) (int64, error) {
func (f *FakeMounter) GetSELinuxSupport(pathname string) (bool, error) {
return false, errors.New("GetSELinuxSupport not implemented")
}

func (f *FakeMounter) GetMode(pathname string) (os.FileMode, error) {
return 0, errors.New("not implemented")
}
24 changes: 14 additions & 10 deletions pkg/util/mount/mount.go
Expand Up @@ -84,16 +84,18 @@ type Interface interface {
// MakeDir creates a new directory.
// Will operate in the host mount namespace if kubelet is running in a container
MakeDir(pathname string) error
// SafeMakeDir makes sure that the created directory does not escape given
// base directory mis-using symlinks. The directory is created in the same
// mount namespace as where kubelet is running. Note that the function makes
// sure that it creates the directory somewhere under the base, nothing
// else. E.g. if the directory already exists, it may exists outside of the
// base due to symlinks.
SafeMakeDir(pathname string, base string, perm os.FileMode) error
// ExistsPath checks whether the path exists.
// Will operate in the host mount namespace if kubelet is running in a container
ExistsPath(pathname string) bool
// SafeMakeDir creates subdir within given base. It makes sure that the
// created directory does not escape given base directory mis-using
// symlinks. Note that the function makes sure that it creates the directory
// somewhere under the base, nothing else. E.g. if the directory already
// exists, it may exist outside of the base due to symlinks.
// This method should be used if the directory to create is inside volume
// that's under user control. User must not be able to use symlinks to
// escape the volume to create directories somewhere else.
SafeMakeDir(subdir string, base string, perm os.FileMode) error
// Will operate in the host mount namespace if kubelet is running in a container.
// Error is returned on any other error than "file not found".
ExistsPath(pathname string) (bool, error)
// CleanSubPaths removes any bind-mounts created by PrepareSafeSubpath in given
// pod volume directory.
CleanSubPaths(podDir string, volumeName string) error
Expand All @@ -117,6 +119,8 @@ type Interface interface {
// GetSELinuxSupport returns true if given path is on a mount that supports
// SELinux.
GetSELinuxSupport(pathname string) (bool, error)
// GetMode returns permissions of the path.
GetMode(pathname string) (os.FileMode, error)
}

type Subpath struct {
Expand Down

0 comments on commit d2495b8

Please sign in to comment.