Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make /var/lib/kubelet as shared during startup #45724

Merged
merged 1 commit into from Sep 2, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions pkg/kubelet/cm/container_manager_linux_test.go
Expand Up @@ -75,6 +75,10 @@ func (mi *fakeMountInterface) PathIsDevice(pathname string) (bool, error) {
return true, nil
}

func (mi *fakeMountInterface) MakeRShared(path string) error {
return nil
}

func fakeContainerMgrMountInt() mount.Interface {
return &fakeMountInterface{
[]mount.MountPoint{
Expand Down
3 changes: 3 additions & 0 deletions pkg/kubelet/kubelet.go
Expand Up @@ -1157,6 +1157,9 @@ func (kl *Kubelet) setupDataDirs() error {
if err := os.MkdirAll(kl.getRootDir(), 0750); err != nil {
return fmt.Errorf("error creating root directory: %v", err)
}
if err := kl.mounter.MakeRShared(kl.getRootDir()); err != nil {
return fmt.Errorf("error configuring root directory: %v", err)
}
if err := os.MkdirAll(kl.getPodsDir(), 0750); err != nil {
return fmt.Errorf("error creating pods directory: %v", err)
}
Expand Down
1 change: 1 addition & 0 deletions pkg/kubelet/kubelet_test.go
Expand Up @@ -160,6 +160,7 @@ func newTestKubeletWithImageList(
kubelet.recorder = fakeRecorder
kubelet.kubeClient = fakeKubeClient
kubelet.os = &containertest.FakeOS{}
kubelet.mounter = &mount.FakeMounter{}

kubelet.hostname = testKubeletHostname
kubelet.nodeName = types.NodeName(testKubeletHostname)
Expand Down
2 changes: 2 additions & 0 deletions pkg/kubelet/runonce_test.go
Expand Up @@ -46,6 +46,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/status"
statustest "k8s.io/kubernetes/pkg/kubelet/status/testing"
"k8s.io/kubernetes/pkg/kubelet/volumemanager"
"k8s.io/kubernetes/pkg/util/mount"
"k8s.io/kubernetes/pkg/volume"
volumetest "k8s.io/kubernetes/pkg/volume/testing"
)
Expand Down Expand Up @@ -127,6 +128,7 @@ func TestRunOnce(t *testing.T) {

kb.evictionManager = evictionManager
kb.admitHandlers.AddPodAdmitHandler(evictionAdmitHandler)
kb.mounter = &mount.FakeMounter{}
if err := kb.setupDataDirs(); err != nil {
t.Errorf("Failed to init data dirs: %v", err)
}
Expand Down
5 changes: 4 additions & 1 deletion pkg/util/io/BUILD
Expand Up @@ -7,7 +7,10 @@ load(

go_library(
name = "go_default_library",
srcs = ["writer.go"],
srcs = [
"consistentread.go",
"writer.go",
],
deps = ["//vendor/github.com/golang/glog:go_default_library"],
)

Expand Down
45 changes: 45 additions & 0 deletions pkg/util/io/consistentread.go
@@ -0,0 +1,45 @@
/*
Copyright 2017 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package io

import (
"bytes"
"fmt"
"io/ioutil"
)

// ConsistentRead repeatedly reads a file until it gets the same content twice.
// This is useful when reading files in /proc that are larger than page size
// and kernel may modify them between individual read() syscalls.
func ConsistentRead(filename string, attempts int) ([]byte, error) {
oldContent, err := ioutil.ReadFile(filename)
if err != nil {
return nil, err
}
for i := 0; i < attempts; i++ {
newContent, err := ioutil.ReadFile(filename)
if err != nil {
return nil, err
}
if bytes.Compare(oldContent, newContent) == 0 {
return newContent, nil
}
// Files are different, continue reading
oldContent = newContent
}
return nil, fmt.Errorf("could not get consistent content of %s after %d attempts", filename, attempts)
}
1 change: 1 addition & 0 deletions pkg/util/mount/BUILD
Expand Up @@ -27,6 +27,7 @@ go_library(
"//vendor/k8s.io/utils/exec:go_default_library",
] + select({
"@io_bazel_rules_go//go/platform:linux_amd64": [
"//pkg/util/io:go_default_library",
"//vendor/golang.org/x/sys/unix:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
],
Expand Down
4 changes: 4 additions & 0 deletions pkg/util/mount/fake.go
Expand Up @@ -171,3 +171,7 @@ func (f *FakeMounter) PathIsDevice(pathname string) (bool, error) {
func (f *FakeMounter) GetDeviceNameFromMount(mountPath, pluginDir string) (string, error) {
return getDeviceNameFromMount(f, mountPath, pluginDir)
}

func (f *FakeMounter) MakeRShared(path string) error {
return nil
}
3 changes: 3 additions & 0 deletions pkg/util/mount/mount.go
Expand Up @@ -67,6 +67,9 @@ type Interface interface {
// GetDeviceNameFromMount finds the device name by checking the mount path
// to get the global mount path which matches its plugin directory
GetDeviceNameFromMount(mountPath, pluginDir string) (string, error)
// MakeRShared checks that given path is on a mount with 'rshared' mount
// propagation. If not, it bind-mounts the path as rshared.
MakeRShared(path string) error
}

// Exec executes command where mount utilities are. This can be either the host,
Expand Down
190 changes: 131 additions & 59 deletions pkg/util/mount/mount_linux.go
Expand Up @@ -19,10 +19,7 @@ limitations under the License.
package mount

import (
"bufio"
"fmt"
"hash/fnv"
"io"
"os"
"os/exec"
"strconv"
Expand All @@ -32,6 +29,7 @@ import (
"github.com/golang/glog"
"golang.org/x/sys/unix"
"k8s.io/apimachinery/pkg/util/sets"
utilio "k8s.io/kubernetes/pkg/util/io"
utilexec "k8s.io/utils/exec"
)

Expand All @@ -42,6 +40,8 @@ const (
expectedNumFieldsPerLine = 6
// Location of the mount file to use
procMountsPath = "/proc/mounts"
// Location of the mountinfo file
procMountInfoPath = "/proc/self/mountinfo"
)

const (
Expand Down Expand Up @@ -333,76 +333,54 @@ func (mounter *Mounter) GetDeviceNameFromMount(mountPath, pluginDir string) (str
}

func listProcMounts(mountFilePath string) ([]MountPoint, error) {
hash1, err := readProcMounts(mountFilePath, nil)
content, err := utilio.ConsistentRead(mountFilePath, maxListTries)
if err != nil {
return nil, err
}

for i := 0; i < maxListTries; i++ {
mps := []MountPoint{}
hash2, err := readProcMounts(mountFilePath, &mps)
if err != nil {
return nil, err
}
if hash1 == hash2 {
// Success
return mps, nil
}
hash1 = hash2
}
return nil, fmt.Errorf("failed to get a consistent snapshot of %v after %d tries", mountFilePath, maxListTries)
}

// readProcMounts reads the given mountFilePath (normally /proc/mounts) and produces a hash
// of the contents. If the out argument is not nil, this fills it with MountPoint structs.
func readProcMounts(mountFilePath string, out *[]MountPoint) (uint32, error) {
file, err := os.Open(mountFilePath)
if err != nil {
return 0, err
}
defer file.Close()
return readProcMountsFrom(file, out)
return parseProcMounts(content)
}

func readProcMountsFrom(file io.Reader, out *[]MountPoint) (uint32, error) {
hash := fnv.New32a()
scanner := bufio.NewReader(file)
for {
line, err := scanner.ReadString('\n')
if err == io.EOF {
break
func parseProcMounts(content []byte) ([]MountPoint, error) {
out := []MountPoint{}
lines := strings.Split(string(content), "\n")
for _, line := range lines {
if line == "" {
// the last split() item is empty string following the last \n
continue
}
fields := strings.Fields(line)
if len(fields) != expectedNumFieldsPerLine {
return 0, fmt.Errorf("wrong number of fields (expected %d, got %d): %s", expectedNumFieldsPerLine, len(fields), line)
return nil, fmt.Errorf("wrong number of fields (expected %d, got %d): %s", expectedNumFieldsPerLine, len(fields), line)
}

fmt.Fprintf(hash, "%s", line)

if out != nil {
mp := MountPoint{
Device: fields[0],
Path: fields[1],
Type: fields[2],
Opts: strings.Split(fields[3], ","),
}

freq, err := strconv.Atoi(fields[4])
if err != nil {
return 0, err
}
mp.Freq = freq
mp := MountPoint{
Device: fields[0],
Path: fields[1],
Type: fields[2],
Opts: strings.Split(fields[3], ","),
}

pass, err := strconv.Atoi(fields[5])
if err != nil {
return 0, err
}
mp.Pass = pass
freq, err := strconv.Atoi(fields[4])
if err != nil {
return nil, err
}
mp.Freq = freq

*out = append(*out, mp)
pass, err := strconv.Atoi(fields[5])
if err != nil {
return nil, err
}
mp.Pass = pass

out = append(out, mp)
}
return hash.Sum32(), nil
return out, nil
}

func (mounter *Mounter) MakeRShared(path string) error {
mountCmd := defaultMountCommand
mountArgs := []string{}
return doMakeRShared(path, procMountInfoPath, mountCmd, mountArgs)
}

// formatAndMount uses unix utils to format and mount the given disk
Expand Down Expand Up @@ -502,3 +480,97 @@ func (mounter *SafeFormatAndMount) getDiskFormat(disk string) (string, error) {
// and MD RAID are reported as FSTYPE and caught above).
return "unknown data, probably partitions", nil
}

// isShared returns true, if given path is on a mount point that has shared
// mount propagation.
func isShared(path string, filename string) (bool, error) {
infos, err := parseMountInfo(filename)
if err != nil {
return false, err
}

// process /proc/xxx/mountinfo in backward order and find the first mount
// point that is prefix of 'path' - that's the mount where path resides
var info *mountInfo
for i := len(infos) - 1; i >= 0; i-- {
if strings.HasPrefix(path, infos[i].mountPoint) {
info = &infos[i]
break
}
}
if info == nil {
return false, fmt.Errorf("cannot find mount point for %q", path)
}

// parse optional parameters
for _, opt := range info.optional {
if strings.HasPrefix(opt, "shared:") {
return true, nil
}
}
return false, nil
}

type mountInfo struct {
mountPoint string
// list of "optional parameters", mount propagation is one of them
optional []string
}

// parseMountInfo parses /proc/xxx/mountinfo.
func parseMountInfo(filename string) ([]mountInfo, error) {
content, err := utilio.ConsistentRead(filename, maxListTries)
if err != nil {
return []mountInfo{}, err
}
contentStr := string(content)
infos := []mountInfo{}

for _, line := range strings.Split(contentStr, "\n") {
if line == "" {
// the last split() item is empty string following the last \n
continue
}
fields := strings.Fields(line)
if len(fields) < 7 {
return nil, fmt.Errorf("wrong number of fields in (expected %d, got %d): %s", 8, len(fields), line)
}
info := mountInfo{
mountPoint: fields[4],
optional: []string{},
}
for i := 6; i < len(fields) && fields[i] != "-"; i++ {
info.optional = append(info.optional, fields[i])
}
infos = append(infos, info)
}
return infos, nil
}

// doMakeRShared is common implementation of MakeRShared on Linux. It checks if
// path is shared and bind-mounts it as rshared if needed. mountCmd and
// mountArgs are expected to contain mount-like command, doMakeRShared will add
// '--bind <path> <path>' and '--make-rshared <path>' to mountArgs.
func doMakeRShared(path string, mountInfoFilename string, mountCmd string, mountArgs []string) error {
shared, err := isShared(path, mountInfoFilename)
if err != nil {
return err
}
if shared {
glog.V(4).Infof("Directory %s is already on a shared mount", path)
return nil
}

glog.V(2).Infof("Bind-mounting %q with shared mount propagation", path)
// mount --bind /var/lib/kubelet /var/lib/kubelet
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not use the syscall package? Why exec?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

huh? syscall.Exec will replace /usr/bin/kulebet in current process with /bin/mount and it won't ever return. We need fork()+exec() and that's exactly what os.Exec does.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. Reworked to use syscall.Mount.

if err := syscall.Mount(path, path, "" /*fstype*/, syscall.MS_BIND, "" /*data*/); err != nil {
return fmt.Errorf("failed to bind-mount %s: %v", path, err)
}

// mount --make-rshared /var/lib/kubelet
if err := syscall.Mount(path, path, "" /*fstype*/, syscall.MS_SHARED|syscall.MS_REC, "" /*data*/); err != nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is a remount flag (MS_REMOUNT) required here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, remount is not required. mount --make-rshared uses only the fags above as implied by the man page and checked with strace.

return fmt.Errorf("failed to make %s rshared: %v", path, err)
}

return nil
}