From 3925810432e403a5947c714cba2301165791be7e Mon Sep 17 00:00:00 2001 From: Loic Blot Date: Wed, 15 Feb 2023 20:30:19 +0100 Subject: [PATCH] feat(porting/freebsd): make kubelet start & interact with CRI on FreeBSD This is a vanilla porting making basic parts of the kubelet interact with FreeBSD and containerd 1.7.0b4 (for now) on FreeBSD --- pkg/kubelet/cadvisor/cadvisor_freebsd.go | 92 +++++ pkg/kubelet/cadvisor/cadvisor_unsupported.go | 4 +- pkg/kubelet/cm/container_manager_freebsd.go | 266 ++++++++++++ .../cm/container_manager_unsupported.go | 4 +- pkg/kubelet/freebsdstats/freebsdstats.go | 227 +++++++++++ .../freebsdstats/perfcounter_nodestats.go | 27 ++ pkg/util/rlimit/rlimit_freebsd.go | 35 ++ pkg/util/rlimit/rlimit_unsupported.go | 4 +- pkg/volume/util/hostutil/hostutil_freebsd.go | 229 +++++++++++ .../util/hostutil/hostutil_unsupported.go | 4 +- pkg/volume/util/subpath/subpath_freebsd.go | 83 ++++ pkg/volume/util/subpath/subpath_linux.go | 350 +--------------- pkg/volume/util/subpath/subpath_unix.go | 384 ++++++++++++++++++ .../util/subpath/subpath_unsupported.go | 4 +- .../mount-utils/mount_helper_freebsd.go | 56 +++ .../k8s.io/mount-utils/mount_helper_linux.go | 151 +++++++ ...nix_test.go => mount_helper_linux_test.go} | 4 +- .../k8s.io/mount-utils/mount_helper_unix.go | 127 ------ 18 files changed, 1567 insertions(+), 484 deletions(-) create mode 100644 pkg/kubelet/cadvisor/cadvisor_freebsd.go create mode 100644 pkg/kubelet/cm/container_manager_freebsd.go create mode 100644 pkg/kubelet/freebsdstats/freebsdstats.go create mode 100644 pkg/kubelet/freebsdstats/perfcounter_nodestats.go create mode 100644 pkg/util/rlimit/rlimit_freebsd.go create mode 100644 pkg/volume/util/hostutil/hostutil_freebsd.go create mode 100644 pkg/volume/util/subpath/subpath_freebsd.go create mode 100644 pkg/volume/util/subpath/subpath_unix.go create mode 100644 staging/src/k8s.io/mount-utils/mount_helper_freebsd.go create mode 100644 staging/src/k8s.io/mount-utils/mount_helper_linux.go rename staging/src/k8s.io/mount-utils/{mount_helper_unix_test.go => mount_helper_linux_test.go} (99%) diff --git a/pkg/kubelet/cadvisor/cadvisor_freebsd.go b/pkg/kubelet/cadvisor/cadvisor_freebsd.go new file mode 100644 index 000000000000..35c2b16f436e --- /dev/null +++ b/pkg/kubelet/cadvisor/cadvisor_freebsd.go @@ -0,0 +1,92 @@ +//go:build freebsd +// +build freebsd + +/* +Copyright 2015 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cadvisor + +import ( + "github.com/google/cadvisor/events" + cadvisorapi "github.com/google/cadvisor/info/v1" + cadvisorapiv2 "github.com/google/cadvisor/info/v2" + "k8s.io/kubernetes/pkg/kubelet/freebsdstats" +) + +type cadvisorClient struct { + rootPath string + bsdStatsClient freebsdstats.Client +} + +var _ Interface = new(cadvisorClient) + +// New creates a cAdvisor and exports its API on the specified port if port > 0. +func New(imageFsInfoProvider ImageFsInfoProvider, rootPath string, cgroupRoots []string, usingLegacyStats, localStorageCapacityIsolation bool) (Interface, error) { + client, err := freebsdstats.NewPerfCounterClient() + return &cadvisorClient{ + rootPath: rootPath, + bsdStatsClient: client, + }, err +} + +func (cu *cadvisorClient) Start() error { + return nil +} + +func (cu *cadvisorClient) DockerContainer(name string, req *cadvisorapi.ContainerInfoRequest) (cadvisorapi.ContainerInfo, error) { + return cadvisorapi.ContainerInfo{}, nil +} + +func (cu *cadvisorClient) ContainerInfo(name string, req *cadvisorapi.ContainerInfoRequest) (*cadvisorapi.ContainerInfo, error) { + return &cadvisorapi.ContainerInfo{}, nil +} + +// ContainerInfoV2 is only expected to be used for the root container. Returns info for all containers in the node. +func (cu *cadvisorClient) ContainerInfoV2(name string, options cadvisorapiv2.RequestOptions) (map[string]cadvisorapiv2.ContainerInfo, error) { + return cu.bsdStatsClient.ContainerInfos() +} + +func (cu *cadvisorClient) GetRequestedContainersInfo(containerName string, options cadvisorapiv2.RequestOptions) (map[string]*cadvisorapi.ContainerInfo, error) { + return nil, nil +} + +func (cu *cadvisorClient) SubcontainerInfo(name string, req *cadvisorapi.ContainerInfoRequest) (map[string]*cadvisorapi.ContainerInfo, error) { + return nil, nil +} + +func (cu *cadvisorClient) MachineInfo() (*cadvisorapi.MachineInfo, error) { + return cu.bsdStatsClient.MachineInfo() +} + +func (cu *cadvisorClient) VersionInfo() (*cadvisorapi.VersionInfo, error) { + return cu.bsdStatsClient.VersionInfo() +} + +func (cu *cadvisorClient) ImagesFsInfo() (cadvisorapiv2.FsInfo, error) { + return cadvisorapiv2.FsInfo{}, nil +} + +func (cu *cadvisorClient) RootFsInfo() (cadvisorapiv2.FsInfo, error) { + return cu.GetDirFsInfo(cu.rootPath) +} + +func (cu *cadvisorClient) WatchEvents(request *events.Request) (*events.EventChannel, error) { + return &events.EventChannel{}, nil +} + +func (cu *cadvisorClient) GetDirFsInfo(path string) (cadvisorapiv2.FsInfo, error) { + return cu.bsdStatsClient.GetDirFsInfo(path) +} \ No newline at end of file diff --git a/pkg/kubelet/cadvisor/cadvisor_unsupported.go b/pkg/kubelet/cadvisor/cadvisor_unsupported.go index 40113a9ce821..4d306c8b9f6c 100644 --- a/pkg/kubelet/cadvisor/cadvisor_unsupported.go +++ b/pkg/kubelet/cadvisor/cadvisor_unsupported.go @@ -1,5 +1,5 @@ -//go:build !linux && !windows -// +build !linux,!windows +//go:build !linux && !windows && !freebsd +// +build !linux,!windows,!freebsd /* Copyright 2015 The Kubernetes Authors. diff --git a/pkg/kubelet/cm/container_manager_freebsd.go b/pkg/kubelet/cm/container_manager_freebsd.go new file mode 100644 index 000000000000..07df71a42cd1 --- /dev/null +++ b/pkg/kubelet/cm/container_manager_freebsd.go @@ -0,0 +1,266 @@ +//go:build freebsd +// +build freebsd + +/* +Copyright 2015 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// containerManagerImpl implements container manager on FreeBSD. + +package cm + +import ( + "fmt" + + "k8s.io/klog/v2" + "k8s.io/mount-utils" + + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/types" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/record" + internalapi "k8s.io/cri-api/pkg/apis" + podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1" + "k8s.io/kubernetes/pkg/kubelet/cadvisor" + "k8s.io/kubernetes/pkg/kubelet/cm/admission" + "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager" + "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager" + "k8s.io/kubernetes/pkg/kubelet/cm/dra" + "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager" + "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" + "k8s.io/kubernetes/pkg/kubelet/config" + kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" + "k8s.io/kubernetes/pkg/kubelet/pluginmanager/cache" + "k8s.io/kubernetes/pkg/kubelet/status" + schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" +) + +type containerManagerImpl struct { + // Capacity of this node. + capacity v1.ResourceList + // Interface for cadvisor. + cadvisorInterface cadvisor.Interface + // Config of this node. + nodeConfig NodeConfig + // Interface for exporting and allocating devices reported by device plugins. + deviceManager devicemanager.Manager + // Interface for Topology resource co-ordination + topologyManager topologymanager.Manager +} + +type noopWindowsResourceAllocator struct{} + +func (ra *noopWindowsResourceAllocator) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitResult { + return admission.GetPodAdmitResult(nil) +} + +func (cm *containerManagerImpl) Start(node *v1.Node, + activePods ActivePodsFunc, + sourcesReady config.SourcesReady, + podStatusProvider status.PodStatusProvider, + runtimeService internalapi.RuntimeService, + localStorageCapacityIsolation bool) error { + klog.V(2).InfoS("Starting Windows container manager") + + if localStorageCapacityIsolation { + rootfs, err := cm.cadvisorInterface.RootFsInfo() + if err != nil { + return fmt.Errorf("failed to get rootfs info: %v", err) + } + for rName, rCap := range cadvisor.EphemeralStorageCapacityFromFsInfo(rootfs) { + cm.capacity[rName] = rCap + } + } + + // Starts device manager. + if err := cm.deviceManager.Start(devicemanager.ActivePodsFunc(activePods), sourcesReady); err != nil { + return err + } + + return nil +} + +// NewContainerManager creates windows container manager. +func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.Interface, nodeConfig NodeConfig, failSwapOn bool, recorder record.EventRecorder, kubeClient clientset.Interface) (ContainerManager, error) { + // It is safe to invoke `MachineInfo` on cAdvisor before logically initializing cAdvisor here because + // machine info is computed and cached once as part of cAdvisor object creation. + // But `RootFsInfo` and `ImagesFsInfo` are not available at this moment so they will be called later during manager starts + machineInfo, err := cadvisorInterface.MachineInfo() + if err != nil { + return nil, err + } + capacity := cadvisor.CapacityFromMachineInfo(machineInfo) + + cm := &containerManagerImpl{ + capacity: capacity, + nodeConfig: nodeConfig, + cadvisorInterface: cadvisorInterface, + } + + cm.topologyManager = topologymanager.NewFakeManager() + + klog.InfoS("Creating device plugin manager") + cm.deviceManager, err = devicemanager.NewManagerImpl(nil, cm.topologyManager) + if err != nil { + return nil, err + } + cm.topologyManager.AddHintProvider(cm.deviceManager) + + return cm, nil +} + +func (cm *containerManagerImpl) SystemCgroupsLimit() v1.ResourceList { + return v1.ResourceList{} +} + +func (cm *containerManagerImpl) GetNodeConfig() NodeConfig { + return NodeConfig{} +} + +func (cm *containerManagerImpl) GetMountedSubsystems() *CgroupSubsystems { + return &CgroupSubsystems{} +} + +func (cm *containerManagerImpl) GetQOSContainersInfo() QOSContainersInfo { + return QOSContainersInfo{} +} + +func (cm *containerManagerImpl) UpdateQOSCgroups() error { + return nil +} + +func (cm *containerManagerImpl) Status() Status { + return Status{} +} + +func (cm *containerManagerImpl) GetNodeAllocatableReservation() v1.ResourceList { + evictionReservation := hardEvictionReservation(cm.nodeConfig.HardEvictionThresholds, cm.capacity) + result := make(v1.ResourceList) + for k := range cm.capacity { + value := resource.NewQuantity(0, resource.DecimalSI) + if cm.nodeConfig.SystemReserved != nil { + value.Add(cm.nodeConfig.SystemReserved[k]) + } + if cm.nodeConfig.KubeReserved != nil { + value.Add(cm.nodeConfig.KubeReserved[k]) + } + if evictionReservation != nil { + value.Add(evictionReservation[k]) + } + if !value.IsZero() { + result[k] = *value + } + } + return result +} + +func (cm *containerManagerImpl) GetCapacity(localStorageCapacityIsolation bool) v1.ResourceList { + return cm.capacity +} + +func (cm *containerManagerImpl) GetPluginRegistrationHandler() cache.PluginHandler { + return cm.deviceManager.GetWatcherHandler() +} + +func (cm *containerManagerImpl) GetDevicePluginResourceCapacity() (v1.ResourceList, v1.ResourceList, []string) { + return cm.deviceManager.GetCapacity() +} + +func (cm *containerManagerImpl) NewPodContainerManager() PodContainerManager { + return &podContainerManagerStub{} +} + +func (cm *containerManagerImpl) GetResources(pod *v1.Pod, container *v1.Container) (*kubecontainer.RunContainerOptions, error) { + opts := &kubecontainer.RunContainerOptions{} + // Allocate should already be called during predicateAdmitHandler.Admit(), + // just try to fetch device runtime information from cached state here + devOpts, err := cm.deviceManager.GetDeviceRunContainerOptions(pod, container) + if err != nil { + return nil, err + } else if devOpts == nil { + return opts, nil + } + opts.Devices = append(opts.Devices, devOpts.Devices...) + opts.Mounts = append(opts.Mounts, devOpts.Mounts...) + opts.Envs = append(opts.Envs, devOpts.Envs...) + opts.Annotations = append(opts.Annotations, devOpts.Annotations...) + return opts, nil +} + +func (cm *containerManagerImpl) UpdatePluginResources(node *schedulerframework.NodeInfo, attrs *lifecycle.PodAdmitAttributes) error { + return cm.deviceManager.UpdatePluginResources(node, attrs) +} + +func (cm *containerManagerImpl) InternalContainerLifecycle() InternalContainerLifecycle { + return &internalContainerLifecycleImpl{cpumanager.NewFakeManager(), memorymanager.NewFakeManager(), topologymanager.NewFakeManager()} +} + +func (cm *containerManagerImpl) GetPodCgroupRoot() string { + return "" +} + +func (cm *containerManagerImpl) GetDevices(podUID, containerName string) []*podresourcesapi.ContainerDevices { + return containerDevicesFromResourceDeviceInstances(cm.deviceManager.GetDevices(podUID, containerName)) +} + +func (cm *containerManagerImpl) GetAllocatableDevices() []*podresourcesapi.ContainerDevices { + return nil +} + +func (cm *containerManagerImpl) ShouldResetExtendedResourceCapacity() bool { + return cm.deviceManager.ShouldResetExtendedResourceCapacity() +} + +func (cm *containerManagerImpl) GetAllocateResourcesPodAdmitHandler() lifecycle.PodAdmitHandler { + return &noopWindowsResourceAllocator{} +} + +func (cm *containerManagerImpl) UpdateAllocatedDevices() { + return +} + +func (cm *containerManagerImpl) GetCPUs(_, _ string) []int64 { + return nil +} + +func (cm *containerManagerImpl) GetAllocatableCPUs() []int64 { + return nil +} + +func (cm *containerManagerImpl) GetMemory(_, _ string) []*podresourcesapi.ContainerMemory { + return nil +} + +func (cm *containerManagerImpl) GetAllocatableMemory() []*podresourcesapi.ContainerMemory { + return nil +} + +func (cm *containerManagerImpl) GetNodeAllocatableAbsolute() v1.ResourceList { + return nil +} + +func (cm *containerManagerImpl) PrepareDynamicResources(pod *v1.Pod) error { + return nil +} + +func (cm *containerManagerImpl) UnprepareDynamicResources(pod *v1.Pod) error { + return nil +} + +func (cm *containerManagerImpl) PodMightNeedToUnprepareResources(UID types.UID) bool { + return false +} diff --git a/pkg/kubelet/cm/container_manager_unsupported.go b/pkg/kubelet/cm/container_manager_unsupported.go index ba4f9a1b6ad4..d4af0d533d90 100644 --- a/pkg/kubelet/cm/container_manager_unsupported.go +++ b/pkg/kubelet/cm/container_manager_unsupported.go @@ -1,5 +1,5 @@ -//go:build !linux && !windows -// +build !linux,!windows +//go:build !linux && !windows && !freebsd +// +build !linux,!windows,!freebsd /* Copyright 2015 The Kubernetes Authors. diff --git a/pkg/kubelet/freebsdstats/freebsdstats.go b/pkg/kubelet/freebsdstats/freebsdstats.go new file mode 100644 index 000000000000..03c2abd63b15 --- /dev/null +++ b/pkg/kubelet/freebsdstats/freebsdstats.go @@ -0,0 +1,227 @@ +//go:build freebsd +// +build freebsd + +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package freebsdtats provides a client to get node and pod level stats on freebsd +package freebsdstats + +import ( + "errors" + "fmt" + "os" + "path/filepath" + "strings" + + "time" + "unsafe" + + cadvisorapi "github.com/google/cadvisor/info/v1" + cadvisorapiv2 "github.com/google/cadvisor/info/v2" + "golang.org/x/sys/unix" +) + +/* +#include +#include +#include +#include +*/ +import "C" + +// Client is an interface that is used to get stats information. +type Client interface { + ContainerInfos() (map[string]cadvisorapiv2.ContainerInfo, error) + MachineInfo() (*cadvisorapi.MachineInfo, error) + VersionInfo() (*cadvisorapi.VersionInfo, error) + GetDirFsInfo(path string) (cadvisorapiv2.FsInfo, error) +} + +// StatsClient is a client that implements the Client interface +type StatsClient struct { +} + +// newClient constructs a Client. +func newClient() (Client, error) { + statsClient := new(StatsClient) + + // err := statsClient.client.startMonitoring() + // if err != nil { + // return nil, err + // } + + return statsClient, nil +} + +func processorCount() (int, error) { + n, err := unix.SysctlUint32("hw.ncpu") + return int(n), err +} + +func memorySize() (uint64, error) { + return unix.SysctlUint64("hw.physmem") +} + +func getSystemUUID() (string, error) { + return unix.Sysctl("kern.hostuuid") +} + +func getOsRelease() (string, error) { + return unix.Sysctl("kern.osrelease") +} + +func getKernVersion() (string, error) { + return unix.Sysctl("kern.version") +} + +// ContainerInfos returns a map of container infos. The map contains node and +// pod level stats. Analogous to cadvisor GetContainerInfoV2 method. +func (c *StatsClient) ContainerInfos() (map[string]cadvisorapiv2.ContainerInfo, error) { + infos := make(map[string]cadvisorapiv2.ContainerInfo) + // rootContainerInfo, err := c.createRootContainerInfo() + // if err != nil { + // return nil, err + // } + + // infos["/"] = *rootContainerInfo + + return infos, nil +} + +// MachineInfo returns a cadvisorapi.MachineInfo with details about the +// node machine. Analogous to cadvisor MachineInfo method. +func (c *StatsClient) MachineInfo() (*cadvisorapi.MachineInfo, error) { + hostname, err := os.Hostname() + if err != nil { + return nil, err + } + + systemUUID, err := getSystemUUID() + if err != nil { + return nil, err + } + + // This is not implemented on FreeBSD + // bootId, err := getBootID() + // if err != nil { + // return nil, err + // } + + numCores, err := processorCount() + if err != nil { + return nil, err + } + + memSize, err := memorySize() + if err != nil { + return nil, err + } + + return &cadvisorapi.MachineInfo{ + NumCores: numCores, + MemoryCapacity: memSize, + MachineID: hostname, + SystemUUID: systemUUID, + // BootID: bootId, + }, nil +} + +// WinVersionInfo returns a cadvisorapi.VersionInfo with version info of +// the kernel and docker runtime. Analogous to cadvisor VersionInfo method. +func (c *StatsClient) VersionInfo() (*cadvisorapi.VersionInfo, error) { + kver, err := getKernVersion() + if err != nil { + return nil, err + } + + osver, err := getOsRelease() + if err != nil { + return nil, err + } + + return &cadvisorapi.VersionInfo{ + KernelVersion: kver, + ContainerOsVersion: osver, + }, nil +} + +func getMountPoints() ([]cadvisorapiv2.FsInfo, error) { + var mntbuf *C.struct_statfs + count := C.getmntinfo(&mntbuf, C.MNT_NOWAIT) + if count == 0 { + return nil, errors.New("failed to run FreeBSD getmntinfo() syscall") + } + + mnt := (*[1 << 20]C.struct_statfs)(unsafe.Pointer(mntbuf)) + infos := make([]cadvisorapiv2.FsInfo, count) + for i := 0; i < int(count); i++ { + inodes := uint64(mnt[i].f_files) + inodesFree := uint64(mnt[i].f_ffree) + infos = append(infos, cadvisorapiv2.FsInfo{ + Timestamp: time.Now(), + Device: C.GoString(&mnt[i].f_mntfromname[0]), + Mountpoint: C.GoString(&mnt[i].f_mntonname[0]), + Inodes: &inodes, + InodesFree: &inodesFree, + Capacity: uint64(mnt[i].f_blocks) * uint64(mnt[i].f_bsize), + Available: uint64(mnt[i].f_bavail) * uint64(mnt[i].f_bsize), + Usage: (uint64(mnt[i].f_blocks) - uint64(mnt[i].f_bavail)) * uint64(mnt[i].f_bsize), + }) + } + return infos, nil +} + +func getMountpoint(mountPath string, mountpoints []cadvisorapiv2.FsInfo) *cadvisorapiv2.FsInfo { + for _, mp := range mountpoints { + if mp.Mountpoint == mountPath { + return &mp + } + } + return nil +} + +// GetDirFsInfo returns filesystem capacity and usage information. +func (c *StatsClient) GetDirFsInfo(path string) (cadvisorapiv2.FsInfo, error) { + // var freeBytesAvailable, totalNumberOfBytes, totalNumberOfFreeBytes int64 + var err error + + mountpoints, err := getMountPoints() + if err != nil { + return cadvisorapiv2.FsInfo{}, err + } + + dir := path + for { + pathdir, _ := filepath.Split(dir) + // break when we reach root + if pathdir == "/" { + if mp := getMountpoint(pathdir, mountpoints); mp != nil { + return *mp, nil + } + + return cadvisorapiv2.FsInfo{}, fmt.Errorf("unable to find mountpoint for path %s", path) + } + // trim "/" from the new parent path otherwise the next possible + // filepath.Split in the loop will not split the string any further + dir = strings.TrimSuffix(pathdir, "/") + if mp := getMountpoint(dir, mountpoints); mp != nil { + return *mp, nil + } + } + + return cadvisorapiv2.FsInfo{}, err +} diff --git a/pkg/kubelet/freebsdstats/perfcounter_nodestats.go b/pkg/kubelet/freebsdstats/perfcounter_nodestats.go new file mode 100644 index 000000000000..69c11ab84089 --- /dev/null +++ b/pkg/kubelet/freebsdstats/perfcounter_nodestats.go @@ -0,0 +1,27 @@ +//go:build freebsd +// +build freebsd + +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package freebsdtats provides a client to get node and pod level stats on freebsd +package freebsdstats + +// NewPerfCounterClient creates a client using perf counters +func NewPerfCounterClient() (Client, error) { + // Initialize the cache + return newClient() +} \ No newline at end of file diff --git a/pkg/util/rlimit/rlimit_freebsd.go b/pkg/util/rlimit/rlimit_freebsd.go new file mode 100644 index 000000000000..3ba7c43c32fc --- /dev/null +++ b/pkg/util/rlimit/rlimit_freebsd.go @@ -0,0 +1,35 @@ +//go:build freebsd +// +build freebsd + +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package rlimit + +import ( + "golang.org/x/sys/unix" + "math" + "fmt" +) + +// SetNumFiles sets the linux rlimit for the maximum open files. +func SetNumFiles(maxOpenFiles uint64) error { + if maxOpenFiles > math.MaxInt64 { + return fmt.Errorf("cannot set RLIMIT_NOFILE to a value upper than INT64_MAX") + } + return unix.Setrlimit(unix.RLIMIT_NOFILE, &unix.Rlimit{Max: int64(maxOpenFiles), Cur: int64(maxOpenFiles)}) +} + diff --git a/pkg/util/rlimit/rlimit_unsupported.go b/pkg/util/rlimit/rlimit_unsupported.go index 96eb9904278c..7445c2bc66a1 100644 --- a/pkg/util/rlimit/rlimit_unsupported.go +++ b/pkg/util/rlimit/rlimit_unsupported.go @@ -1,5 +1,5 @@ -//go:build !linux -// +build !linux +//go:build !linux && !freebsd +// +build !linux && !freebsd /* Copyright 2016 The Kubernetes Authors. diff --git a/pkg/volume/util/hostutil/hostutil_freebsd.go b/pkg/volume/util/hostutil/hostutil_freebsd.go new file mode 100644 index 000000000000..215296b992ce --- /dev/null +++ b/pkg/volume/util/hostutil/hostutil_freebsd.go @@ -0,0 +1,229 @@ +//go:build freebsd +// +build freebsd + +/* +Copyright 2014 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package hostutil + +import ( + "fmt" + "os" + "path" + "path/filepath" + "strings" + "syscall" + + "golang.org/x/sys/unix" + "k8s.io/klog/v2" + "k8s.io/mount-utils" + utilpath "k8s.io/utils/path" +) + +const ( + // Location of the mountinfo file + procMountInfoPath = "/proc/self/mountinfo" +) + +// HostUtil implements HostUtils for Linux platforms. +type HostUtil struct { +} + +// NewHostUtil returns a struct that implements the HostUtils interface on +// linux platforms +func NewHostUtil() *HostUtil { + return &HostUtil{} +} + +// DeviceOpened checks if block device in use by calling Open with O_EXCL flag. +// If pathname is not a device, log and return false with nil error. +// If open returns errno EBUSY, return true with nil error. +// If open returns nil, return false with nil error. +// Otherwise, return false with error +func (hu *HostUtil) DeviceOpened(pathname string) (bool, error) { + return ExclusiveOpenFailsOnDevice(pathname) +} + +// PathIsDevice uses FileInfo returned from os.Stat to check if path refers +// to a device. +func (hu *HostUtil) PathIsDevice(pathname string) (bool, error) { + pathType, err := hu.GetFileType(pathname) + isDevice := pathType == FileTypeCharDev || pathType == FileTypeBlockDev + return isDevice, err +} + +// ExclusiveOpenFailsOnDevice is shared with NsEnterMounter +func ExclusiveOpenFailsOnDevice(pathname string) (bool, error) { + var isDevice bool + finfo, err := os.Stat(pathname) + if os.IsNotExist(err) { + isDevice = false + } + // err in call to os.Stat + if err != nil { + return false, fmt.Errorf( + "PathIsDevice failed for path %q: %v", + pathname, + err) + } + // path refers to a device + if finfo.Mode()&os.ModeDevice != 0 { + isDevice = true + } + + if !isDevice { + klog.Errorf("Path %q is not referring to a device.", pathname) + return false, nil + } + fd, errno := unix.Open(pathname, unix.O_RDONLY|unix.O_EXCL|unix.O_CLOEXEC, 0) + // If the device is in use, open will return an invalid fd. + // When this happens, it is expected that Close will fail and throw an error. + defer unix.Close(fd) + if errno == nil { + // device not in use + return false, nil + } else if errno == unix.EBUSY { + // device is in use + return true, nil + } + // error during call to Open + return false, errno +} + +// GetDeviceNameFromMount given a mount point, find the device name from its global mount point +func (hu *HostUtil) GetDeviceNameFromMount(mounter mount.Interface, mountPath, pluginMountDir string) (string, error) { + return getDeviceNameFromMount(mounter, mountPath, pluginMountDir) +} + +// getDeviceNameFromMountLinux find the device name from /proc/mounts in which +// the mount path reference should match the given plugin mount directory. In case no mount path reference +// matches, returns the volume name taken from its given mountPath +func getDeviceNameFromMount(mounter mount.Interface, mountPath, pluginMountDir string) (string, error) { + refs, err := mounter.GetMountRefs(mountPath) + if err != nil { + klog.V(4).Infof("GetMountRefs failed for mount path %q: %v", mountPath, err) + return "", err + } + if len(refs) == 0 { + klog.V(4).Infof("Directory %s is not mounted", mountPath) + return "", fmt.Errorf("directory %s is not mounted", mountPath) + } + for _, ref := range refs { + if strings.HasPrefix(ref, pluginMountDir) { + volumeID, err := filepath.Rel(pluginMountDir, ref) + if err != nil { + klog.Errorf("Failed to get volume id from mount %s - %v", mountPath, err) + return "", err + } + return volumeID, nil + } + } + + return path.Base(mountPath), nil +} + +// MakeRShared checks that given path is on a mount with 'rshared' mount +// propagation. Empty implementation here. +func (hu *HostUtil) MakeRShared(path string) error { + return nil +} + +// GetFileType checks for file/directory/socket/block/character devices. +func (hu *HostUtil) GetFileType(pathname string) (FileType, error) { + return getFileType(pathname) +} + +// PathExists tests if the given path already exists +// Error is returned on any other error than "file not found". +func (hu *HostUtil) PathExists(pathname string) (bool, error) { + return utilpath.Exists(utilpath.CheckFollowSymlink, pathname) +} + +// EvalHostSymlinks returns the path name after evaluating symlinks. +// TODO once the nsenter implementation is removed, this method can be removed +// from the interface and filepath.EvalSymlinks used directly +func (hu *HostUtil) EvalHostSymlinks(pathname string) (string, error) { + return filepath.EvalSymlinks(pathname) +} + +// FindMountInfo returns the mount info on the given path. +func (hu *HostUtil) FindMountInfo(path string) (mount.MountInfo, error) { + return findMountInfo(path, procMountInfoPath) +} + +func findMountInfo(path, mountInfoPath string) (mount.MountInfo, error) { + infos, err := mount.ParseMountInfo(mountInfoPath) + if err != nil { + return mount.MountInfo{}, err + } + + // process /proc/xxx/mountinfo in backward order and find the first mount + // point that is prefix of 'path' - that's the mount where path resides + var info *mount.MountInfo + for i := len(infos) - 1; i >= 0; i-- { + if mount.PathWithinBase(path, infos[i].MountPoint) { + info = &infos[i] + break + } + } + if info == nil { + return mount.MountInfo{}, fmt.Errorf("cannot find mount point for %q", path) + } + return *info, nil +} + +// selinux.SELinuxEnabled implementation for unit tests +type seLinuxEnabledFunc func() bool + +// GetSELinux is not supported on FreeBSD +func GetSELinux(path string, mountInfoFilename string, selinuxEnabled seLinuxEnabledFunc) (bool, error) { + return false, nil +} + +// GetSELinuxSupport return false on FreeBSD +func (hu *HostUtil) GetSELinuxSupport(pathname string) (bool, error) { + return false, nil +} + +// GetOwner returns the integer ID for the user and group of the given path +func (hu *HostUtil) GetOwner(pathname string) (int64, int64, error) { + realpath, err := filepath.EvalSymlinks(pathname) + if err != nil { + return -1, -1, err + } + + info, err := os.Stat(realpath) + if err != nil { + return -1, -1, err + } + stat := info.Sys().(*syscall.Stat_t) + return int64(stat.Uid), int64(stat.Gid), nil +} + +// GetMode returns permissions of the path. +func (hu *HostUtil) GetMode(pathname string) (os.FileMode, error) { + info, err := os.Stat(pathname) + if err != nil { + return 0, err + } + return info.Mode(), nil +} + +// GetSELinuxMountContext is not supported on FreeBSD +func (hu *HostUtil) GetSELinuxMountContext(pathname string) (string, error) { + return "", nil +} + diff --git a/pkg/volume/util/hostutil/hostutil_unsupported.go b/pkg/volume/util/hostutil/hostutil_unsupported.go index c5ff9c0b5e1e..2e644b31cb1a 100644 --- a/pkg/volume/util/hostutil/hostutil_unsupported.go +++ b/pkg/volume/util/hostutil/hostutil_unsupported.go @@ -1,5 +1,5 @@ -//go:build !linux && !windows -// +build !linux,!windows +//go:build !linux && !windows && !freebsd +// +build !linux,!windows,!freebsd /* Copyright 2014 The Kubernetes Authors. diff --git a/pkg/volume/util/subpath/subpath_freebsd.go b/pkg/volume/util/subpath/subpath_freebsd.go new file mode 100644 index 000000000000..d90d99e16167 --- /dev/null +++ b/pkg/volume/util/subpath/subpath_freebsd.go @@ -0,0 +1,83 @@ +//go:build freebsd +// +build freebsd + +/* +Copyright 2014 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package subpath + +import ( + "errors" + "fmt" + "unsafe" + "k8s.io/mount-utils" + "k8s.io/utils/nsenter" +) + +/* +#include +#include +#include + +int openat_no_vargs(int fd, const char *path, int flags, uint32_t mode) { return openat(fd, path, flags, mode); } +*/ +import "C" + +const ( + O_PATH_PORTABLE = C.O_PATH // TODO: rebind with libc directly +) + +var errUnsupported = errors.New("util/subpath on this platform is not fully supported") + +// New returns a subpath.Interface for the current system. +func New(mount.Interface) Interface { + return &subpath{} +} + +// NewNSEnter is to satisfy the compiler for having NewSubpathNSEnter exist for all +// OS choices. however, NSEnter is only valid on Linux +func NewNSEnter(mounter mount.Interface, ne *nsenter.Nsenter, rootDir string) Interface { + return nil +} + +func (sp *subpath) PrepareSafeSubpath(subPath Subpath) (newHostPath string, cleanupAction func(), err error) { + return subPath.Path, nil, errUnsupported +} + +// This call is not implemented in golang unix/syscall, we need to bind +// from FreeBSD libc +func doOpenat(fd int, path string, flags int, mode uint32) (int, error) { + cPath := C.CString(path) + defer C.free(unsafe.Pointer(cPath)) + res := C.openat_no_vargs(C.int(fd), cPath, C.int(flags), C.uint(mode)) + if res < 0 { + return 0, fmt.Errorf("openat failed for path %s", path) + } + return int(res), nil +} + +// This call is not implemented in golang unix/syscall, we need to bind +// from FreeBSD libc +func doMkdirat(dirfd int, path string, mode uint32) (err error) { + cPath := C.CString(path) + defer C.free(unsafe.Pointer(cPath)) + res := C.mkdirat(C.int(dirfd), cPath, C.ushort(mode)) + if res < 0 { + return fmt.Errorf("mkdirat failed for path %s", path) + } + return nil + +} diff --git a/pkg/volume/util/subpath/subpath_linux.go b/pkg/volume/util/subpath/subpath_linux.go index 583939bdc4a0..e22a0d2d1efe 100644 --- a/pkg/volume/util/subpath/subpath_linux.go +++ b/pkg/volume/util/subpath/subpath_linux.go @@ -25,7 +25,6 @@ import ( "os" "path/filepath" "strconv" - "strings" "syscall" "golang.org/x/sys/unix" @@ -34,19 +33,9 @@ import ( ) const ( - // place for subpath mounts - // TODO: pass in directory using kubelet_getters instead - containerSubPathDirectoryName = "volume-subpaths" - // syscall.Openat flags used to traverse directories not following symlinks - nofollowFlags = unix.O_RDONLY | unix.O_NOFOLLOW - // flags for getting file descriptor without following the symlink - openFDFlags = unix.O_NOFOLLOW | unix.O_PATH + O_PATH_PORTABLE = unix.O_PATH ) -type subpath struct { - mounter mount.Interface -} - // New returns a subpath.Interface for the current system func New(mounter mount.Interface) Interface { return &subpath{ @@ -54,21 +43,6 @@ func New(mounter mount.Interface) Interface { } } -func (sp *subpath) CleanSubPaths(podDir string, volumeName string) error { - return doCleanSubPaths(sp.mounter, podDir, volumeName) -} - -func (sp *subpath) SafeMakeDir(subdir string, base string, perm os.FileMode) error { - realBase, err := filepath.EvalSymlinks(base) - if err != nil { - return fmt.Errorf("error resolving symlinks in %s: %s", base, err) - } - - realFullPath := filepath.Join(realBase, subdir) - - return doSafeMakeDir(realFullPath, realBase, perm) -} - func (sp *subpath) PrepareSafeSubpath(subPath Subpath) (newHostPath string, cleanupAction func(), err error) { newHostPath, err = doBindSubPath(sp.mounter, subPath) @@ -237,92 +211,6 @@ func doBindSubPath(mounter mount.Interface, subpath Subpath) (hostPath string, e return bindPathTarget, nil } -// This implementation is shared between Linux and NsEnter -func doCleanSubPaths(mounter mount.Interface, podDir string, volumeName string) error { - // scan /var/lib/kubelet/pods//volume-subpaths//* - subPathDir := filepath.Join(podDir, containerSubPathDirectoryName, volumeName) - klog.V(4).Infof("Cleaning up subpath mounts for %s", subPathDir) - - containerDirs, err := ioutil.ReadDir(subPathDir) - if err != nil { - if os.IsNotExist(err) { - return nil - } - return fmt.Errorf("error reading %s: %s", subPathDir, err) - } - - for _, containerDir := range containerDirs { - if !containerDir.IsDir() { - klog.V(4).Infof("Container file is not a directory: %s", containerDir.Name()) - continue - } - klog.V(4).Infof("Cleaning up subpath mounts for container %s", containerDir.Name()) - - // scan /var/lib/kubelet/pods//volume-subpaths///* - fullContainerDirPath := filepath.Join(subPathDir, containerDir.Name()) - // The original traversal method here was ReadDir, which was not so robust to handle some error such as "stale NFS file handle", - // so it was replaced with filepath.Walk in a later patch, which can pass through error and handled by the callback WalkFunc. - // After go 1.16, WalkDir was introduced, it's more effective than Walk because the callback WalkDirFunc is called before - // reading a directory, making it save some time when a container's subPath contains lots of dirs. - // See https://github.com/kubernetes/kubernetes/pull/71804 and https://github.com/kubernetes/kubernetes/issues/107667 for more details. - err = filepath.WalkDir(fullContainerDirPath, func(path string, info os.DirEntry, _ error) error { - if path == fullContainerDirPath { - // Skip top level directory - return nil - } - - // pass through errors and let doCleanSubPath handle them - if err = doCleanSubPath(mounter, fullContainerDirPath, filepath.Base(path)); err != nil { - return err - } - - // We need to check that info is not nil. This may happen when the incoming err is not nil due to stale mounts or permission errors. - if info != nil && info.IsDir() { - // skip subdirs of the volume: it only matters the first level to unmount, otherwise it would try to unmount subdir of the volume - return filepath.SkipDir - } - - return nil - }) - if err != nil { - return fmt.Errorf("error processing %s: %s", fullContainerDirPath, err) - } - - // Whole container has been processed, remove its directory. - if err := os.Remove(fullContainerDirPath); err != nil { - return fmt.Errorf("error deleting %s: %s", fullContainerDirPath, err) - } - klog.V(5).Infof("Removed %s", fullContainerDirPath) - } - // Whole pod volume subpaths have been cleaned up, remove its subpath directory. - if err := os.Remove(subPathDir); err != nil { - return fmt.Errorf("error deleting %s: %s", subPathDir, err) - } - klog.V(5).Infof("Removed %s", subPathDir) - - // Remove entire subpath directory if it's the last one - podSubPathDir := filepath.Join(podDir, containerSubPathDirectoryName) - if err := os.Remove(podSubPathDir); err != nil && !os.IsExist(err) { - return fmt.Errorf("error deleting %s: %s", podSubPathDir, err) - } - klog.V(5).Infof("Removed %s", podSubPathDir) - return nil -} - -// doCleanSubPath tears down the single subpath bind mount -func doCleanSubPath(mounter mount.Interface, fullContainerDirPath, subPathIndex string) error { - // process /var/lib/kubelet/pods//volume-subpaths/// - klog.V(4).Infof("Cleaning up subpath mounts for subpath %v", subPathIndex) - fullSubPath := filepath.Join(fullContainerDirPath, subPathIndex) - - if err := mount.CleanupMountPoint(fullSubPath, mounter, true); err != nil { - return fmt.Errorf("error cleaning subpath mount %s: %s", fullSubPath, err) - } - - klog.V(4).Infof("Successfully cleaned subpath directory %s", fullSubPath) - return nil -} - // cleanSubPath will teardown the subpath bind mount and any remove any directories if empty func cleanSubPath(mounter mount.Interface, subpath Subpath) error { containerDir := filepath.Join(subpath.PodDir, containerSubPathDirectoryName, subpath.VolumeName, subpath.ContainerName) @@ -372,238 +260,10 @@ func removeEmptyDirs(baseDir, endDir string) error { return nil } -// This implementation is shared between Linux and NsEnterMounter. Both pathname -// and base must be either already resolved symlinks or thet will be resolved in -// kubelet's mount namespace (in case it runs containerized). -func doSafeMakeDir(pathname string, base string, perm os.FileMode) error { - klog.V(4).Infof("Creating directory %q within base %q", pathname, base) - - if !mount.PathWithinBase(pathname, base) { - return fmt.Errorf("path %s is outside of allowed base %s", pathname, base) - } - - // Quick check if the directory already exists - s, err := os.Stat(pathname) - if err == nil { - // Path exists - if s.IsDir() { - // The directory already exists. It can be outside of the parent, - // but there is no race-proof check. - klog.V(4).Infof("Directory %s already exists", pathname) - return nil - } - return &os.PathError{Op: "mkdir", Path: pathname, Err: syscall.ENOTDIR} - } - - // Find all existing directories - existingPath, toCreate, err := findExistingPrefix(base, pathname) - if err != nil { - return fmt.Errorf("error opening directory %s: %s", pathname, err) - } - // Ensure the existing directory is inside allowed base - fullExistingPath, err := filepath.EvalSymlinks(existingPath) - if err != nil { - return fmt.Errorf("error opening directory %s: %s", existingPath, err) - } - if !mount.PathWithinBase(fullExistingPath, base) { - return fmt.Errorf("path %s is outside of allowed base %s", fullExistingPath, err) - } - - klog.V(4).Infof("%q already exists, %q to create", fullExistingPath, filepath.Join(toCreate...)) - parentFD, err := doSafeOpen(fullExistingPath, base) - if err != nil { - return fmt.Errorf("cannot open directory %s: %s", existingPath, err) - } - childFD := -1 - defer func() { - if parentFD != -1 { - if err = syscall.Close(parentFD); err != nil { - klog.V(4).Infof("Closing FD %v failed for safemkdir(%v): %v", parentFD, pathname, err) - } - } - if childFD != -1 { - if err = syscall.Close(childFD); err != nil { - klog.V(4).Infof("Closing FD %v failed for safemkdir(%v): %v", childFD, pathname, err) - } - } - }() - - currentPath := fullExistingPath - // create the directories one by one, making sure nobody can change - // created directory into symlink. - for _, dir := range toCreate { - currentPath = filepath.Join(currentPath, dir) - klog.V(4).Infof("Creating %s", dir) - err = syscall.Mkdirat(parentFD, currentPath, uint32(perm)) - if err != nil { - return fmt.Errorf("cannot create directory %s: %s", currentPath, err) - } - // Dive into the created directory - childFD, err = syscall.Openat(parentFD, dir, nofollowFlags|unix.O_CLOEXEC, 0) - if err != nil { - return fmt.Errorf("cannot open %s: %s", currentPath, err) - } - // We can be sure that childFD is safe to use. It could be changed - // by user after Mkdirat() and before Openat(), however: - // - it could not be changed to symlink - we use nofollowFlags - // - it could be changed to a file (or device, pipe, socket, ...) - // but either subsequent Mkdirat() fails or we mount this file - // to user's container. Security is no violated in both cases - // and user either gets error or the file that it can already access. - - if err = syscall.Close(parentFD); err != nil { - klog.V(4).Infof("Closing FD %v failed for safemkdir(%v): %v", parentFD, pathname, err) - } - parentFD = childFD - childFD = -1 - - // Everything was created. mkdirat(..., perm) above was affected by current - // umask and we must apply the right permissions to the all created directory. - // (that's the one that will be available to the container as subpath) - // so user can read/write it. - // parentFD is the last created directory. - - // Translate perm (os.FileMode) to uint32 that fchmod() expects - kernelPerm := uint32(perm & os.ModePerm) - if perm&os.ModeSetgid > 0 { - kernelPerm |= syscall.S_ISGID - } - if perm&os.ModeSetuid > 0 { - kernelPerm |= syscall.S_ISUID - } - if perm&os.ModeSticky > 0 { - kernelPerm |= syscall.S_ISVTX - } - if err = syscall.Fchmod(parentFD, kernelPerm); err != nil { - return fmt.Errorf("chmod %q failed: %s", currentPath, err) - } - } - - return nil -} - -// findExistingPrefix finds prefix of pathname that exists. In addition, it -// returns list of remaining directories that don't exist yet. -func findExistingPrefix(base, pathname string) (string, []string, error) { - rel, err := filepath.Rel(base, pathname) - if err != nil { - return base, nil, err - } - dirs := strings.Split(rel, string(filepath.Separator)) - - // Do OpenAt in a loop to find the first non-existing dir. Resolve symlinks. - // This should be faster than looping through all dirs and calling os.Stat() - // on each of them, as the symlinks are resolved only once with OpenAt(). - currentPath := base - fd, err := syscall.Open(currentPath, syscall.O_RDONLY|syscall.O_CLOEXEC, 0) - if err != nil { - return pathname, nil, fmt.Errorf("error opening %s: %s", currentPath, err) - } - defer func() { - if err = syscall.Close(fd); err != nil { - klog.V(4).Infof("Closing FD %v failed for findExistingPrefix(%v): %v", fd, pathname, err) - } - }() - for i, dir := range dirs { - // Using O_PATH here will prevent hangs in case user replaces directory with - // fifo - childFD, err := syscall.Openat(fd, dir, unix.O_PATH|unix.O_CLOEXEC, 0) - if err != nil { - if os.IsNotExist(err) { - return currentPath, dirs[i:], nil - } - return base, nil, err - } - if err = syscall.Close(fd); err != nil { - klog.V(4).Infof("Closing FD %v failed for findExistingPrefix(%v): %v", fd, pathname, err) - } - fd = childFD - currentPath = filepath.Join(currentPath, dir) - } - return pathname, []string{}, nil +func doMkdirat(dirfd int, path string, mode uint32) (err error) { + return syscall.Mkdirat(dirfd, path, mode) } -// This implementation is shared between Linux and NsEnterMounter -// Open path and return its fd. -// Symlinks are disallowed (pathname must already resolve symlinks), -// and the path must be within the base directory. -func doSafeOpen(pathname string, base string) (int, error) { - pathname = filepath.Clean(pathname) - base = filepath.Clean(base) - - // Calculate segments to follow - subpath, err := filepath.Rel(base, pathname) - if err != nil { - return -1, err - } - segments := strings.Split(subpath, string(filepath.Separator)) - - // Assumption: base is the only directory that we have under control. - // Base dir is not allowed to be a symlink. - parentFD, err := syscall.Open(base, nofollowFlags|unix.O_CLOEXEC, 0) - if err != nil { - return -1, fmt.Errorf("cannot open directory %s: %s", base, err) - } - defer func() { - if parentFD != -1 { - if err = syscall.Close(parentFD); err != nil { - klog.V(4).Infof("Closing FD %v failed for safeopen(%v): %v", parentFD, pathname, err) - } - } - }() - - childFD := -1 - defer func() { - if childFD != -1 { - if err = syscall.Close(childFD); err != nil { - klog.V(4).Infof("Closing FD %v failed for safeopen(%v): %v", childFD, pathname, err) - } - } - }() - - currentPath := base - - // Follow the segments one by one using openat() to make - // sure the user cannot change already existing directories into symlinks. - for _, seg := range segments { - var deviceStat unix.Stat_t - - currentPath = filepath.Join(currentPath, seg) - if !mount.PathWithinBase(currentPath, base) { - return -1, fmt.Errorf("path %s is outside of allowed base %s", currentPath, base) - } - - // Trigger auto mount if it's an auto-mounted directory, ignore error if not a directory. - // Notice the trailing slash is mandatory, see "automount" in openat(2) and open_by_handle_at(2). - unix.Fstatat(parentFD, seg+"/", &deviceStat, unix.AT_SYMLINK_NOFOLLOW) - - klog.V(5).Infof("Opening path %s", currentPath) - childFD, err = syscall.Openat(parentFD, seg, openFDFlags|unix.O_CLOEXEC, 0) - if err != nil { - return -1, fmt.Errorf("cannot open %s: %s", currentPath, err) - } - - err := unix.Fstat(childFD, &deviceStat) - if err != nil { - return -1, fmt.Errorf("error running fstat on %s with %v", currentPath, err) - } - fileFmt := deviceStat.Mode & syscall.S_IFMT - if fileFmt == syscall.S_IFLNK { - return -1, fmt.Errorf("unexpected symlink found %s", currentPath) - } - - // Close parentFD - if err = syscall.Close(parentFD); err != nil { - return -1, fmt.Errorf("closing fd for %q failed: %v", filepath.Dir(currentPath), err) - } - // Set child to new parent - parentFD = childFD - childFD = -1 - } - - // We made it to the end, return this fd, don't close it - finalFD := parentFD - parentFD = -1 - - return finalFD, nil +func doOpenat(fd int, path string, flags int, mode uint32) (int, error) { + return syscall.Openat(fd, path, flags, mode) } diff --git a/pkg/volume/util/subpath/subpath_unix.go b/pkg/volume/util/subpath/subpath_unix.go new file mode 100644 index 000000000000..f62cd20f0d65 --- /dev/null +++ b/pkg/volume/util/subpath/subpath_unix.go @@ -0,0 +1,384 @@ +//go:build linux || freebsd +// +build linux freebsd + +/* +Copyright 2014 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package subpath + +import ( + "fmt" + "io/ioutil" + "os" + "path/filepath" + "strings" + "syscall" + + "golang.org/x/sys/unix" + "k8s.io/klog/v2" + "k8s.io/mount-utils" +) + +const ( + // place for subpath mounts + // TODO: pass in directory using kubelet_getters instead + containerSubPathDirectoryName = "volume-subpaths" + // syscall.Openat flags used to traverse directories not following symlinks + nofollowFlags = unix.O_RDONLY | unix.O_NOFOLLOW + // flags for getting file descriptor without following the symlink + openFDFlags = unix.O_NOFOLLOW | O_PATH_PORTABLE +) + +type subpath struct { + mounter mount.Interface +} + +func (sp *subpath) CleanSubPaths(podDir string, volumeName string) error { + return doCleanSubPaths(sp.mounter, podDir, volumeName) +} + +func (sp *subpath) SafeMakeDir(subdir string, base string, perm os.FileMode) error { + realBase, err := filepath.EvalSymlinks(base) + if err != nil { + return fmt.Errorf("error resolving symlinks in %s: %s", base, err) + } + + realFullPath := filepath.Join(realBase, subdir) + + return doSafeMakeDir(realFullPath, realBase, perm) +} + +// This implementation is shared between Linux and NsEnter +func doCleanSubPaths(mounter mount.Interface, podDir string, volumeName string) error { + // scan /var/lib/kubelet/pods//volume-subpaths//* + subPathDir := filepath.Join(podDir, containerSubPathDirectoryName, volumeName) + klog.V(4).Infof("Cleaning up subpath mounts for %s", subPathDir) + + containerDirs, err := ioutil.ReadDir(subPathDir) + if err != nil { + if os.IsNotExist(err) { + return nil + } + return fmt.Errorf("error reading %s: %s", subPathDir, err) + } + + for _, containerDir := range containerDirs { + if !containerDir.IsDir() { + klog.V(4).Infof("Container file is not a directory: %s", containerDir.Name()) + continue + } + klog.V(4).Infof("Cleaning up subpath mounts for container %s", containerDir.Name()) + + // scan /var/lib/kubelet/pods//volume-subpaths///* + fullContainerDirPath := filepath.Join(subPathDir, containerDir.Name()) + // The original traversal method here was ReadDir, which was not so robust to handle some error such as "stale NFS file handle", + // so it was replaced with filepath.Walk in a later patch, which can pass through error and handled by the callback WalkFunc. + // After go 1.16, WalkDir was introduced, it's more effective than Walk because the callback WalkDirFunc is called before + // reading a directory, making it save some time when a container's subPath contains lots of dirs. + // See https://github.com/kubernetes/kubernetes/pull/71804 and https://github.com/kubernetes/kubernetes/issues/107667 for more details. + err = filepath.WalkDir(fullContainerDirPath, func(path string, info os.DirEntry, _ error) error { + if path == fullContainerDirPath { + // Skip top level directory + return nil + } + + // pass through errors and let doCleanSubPath handle them + if err = doCleanSubPath(mounter, fullContainerDirPath, filepath.Base(path)); err != nil { + return err + } + + // We need to check that info is not nil. This may happen when the incoming err is not nil due to stale mounts or permission errors. + if info != nil && info.IsDir() { + // skip subdirs of the volume: it only matters the first level to unmount, otherwise it would try to unmount subdir of the volume + return filepath.SkipDir + } + + return nil + }) + if err != nil { + return fmt.Errorf("error processing %s: %s", fullContainerDirPath, err) + } + + // Whole container has been processed, remove its directory. + if err := os.Remove(fullContainerDirPath); err != nil { + return fmt.Errorf("error deleting %s: %s", fullContainerDirPath, err) + } + klog.V(5).Infof("Removed %s", fullContainerDirPath) + } + // Whole pod volume subpaths have been cleaned up, remove its subpath directory. + if err := os.Remove(subPathDir); err != nil { + return fmt.Errorf("error deleting %s: %s", subPathDir, err) + } + klog.V(5).Infof("Removed %s", subPathDir) + + // Remove entire subpath directory if it's the last one + podSubPathDir := filepath.Join(podDir, containerSubPathDirectoryName) + if err := os.Remove(podSubPathDir); err != nil && !os.IsExist(err) { + return fmt.Errorf("error deleting %s: %s", podSubPathDir, err) + } + klog.V(5).Infof("Removed %s", podSubPathDir) + return nil +} + +// doCleanSubPath tears down the single subpath bind mount +func doCleanSubPath(mounter mount.Interface, fullContainerDirPath, subPathIndex string) error { + // process /var/lib/kubelet/pods//volume-subpaths/// + klog.V(4).Infof("Cleaning up subpath mounts for subpath %v", subPathIndex) + fullSubPath := filepath.Join(fullContainerDirPath, subPathIndex) + + if err := mount.CleanupMountPoint(fullSubPath, mounter, true); err != nil { + return fmt.Errorf("error cleaning subpath mount %s: %s", fullSubPath, err) + } + + klog.V(4).Infof("Successfully cleaned subpath directory %s", fullSubPath) + return nil +} + +// This implementation is shared between Linux and NsEnterMounter. Both pathname +// and base must be either already resolved symlinks or thet will be resolved in +// kubelet's mount namespace (in case it runs containerized). +func doSafeMakeDir(pathname string, base string, perm os.FileMode) error { + klog.V(4).Infof("Creating directory %q within base %q", pathname, base) + + if !mount.PathWithinBase(pathname, base) { + return fmt.Errorf("path %s is outside of allowed base %s", pathname, base) + } + + // Quick check if the directory already exists + s, err := os.Stat(pathname) + if err == nil { + // Path exists + if s.IsDir() { + // The directory already exists. It can be outside of the parent, + // but there is no race-proof check. + klog.V(4).Infof("Directory %s already exists", pathname) + return nil + } + return &os.PathError{Op: "mkdir", Path: pathname, Err: syscall.ENOTDIR} + } + + // Find all existing directories + existingPath, toCreate, err := findExistingPrefix(base, pathname) + if err != nil { + return fmt.Errorf("error opening directory %s: %s", pathname, err) + } + // Ensure the existing directory is inside allowed base + fullExistingPath, err := filepath.EvalSymlinks(existingPath) + if err != nil { + return fmt.Errorf("error opening directory %s: %s", existingPath, err) + } + if !mount.PathWithinBase(fullExistingPath, base) { + return fmt.Errorf("path %s is outside of allowed base %s", fullExistingPath, err) + } + + klog.V(4).Infof("%q already exists, %q to create", fullExistingPath, filepath.Join(toCreate...)) + parentFD, err := doSafeOpen(fullExistingPath, base) + if err != nil { + return fmt.Errorf("cannot open directory %s: %s", existingPath, err) + } + childFD := -1 + defer func() { + if parentFD != -1 { + if err = syscall.Close(parentFD); err != nil { + klog.V(4).Infof("Closing FD %v failed for safemkdir(%v): %v", parentFD, pathname, err) + } + } + if childFD != -1 { + if err = syscall.Close(childFD); err != nil { + klog.V(4).Infof("Closing FD %v failed for safemkdir(%v): %v", childFD, pathname, err) + } + } + }() + + currentPath := fullExistingPath + // create the directories one by one, making sure nobody can change + // created directory into symlink. + for _, dir := range toCreate { + currentPath = filepath.Join(currentPath, dir) + klog.V(4).Infof("Creating %s", dir) + err = doMkdirat(parentFD, currentPath, uint32(perm)) + if err != nil { + return fmt.Errorf("cannot create directory %s: %s", currentPath, err) + } + // Dive into the created directory + childFD, err = doOpenat(parentFD, dir, nofollowFlags|unix.O_CLOEXEC, 0) + if err != nil { + return fmt.Errorf("cannot open %s: %s", currentPath, err) + } + // We can be sure that childFD is safe to use. It could be changed + // by user after Mkdirat() and before Openat(), however: + // - it could not be changed to symlink - we use nofollowFlags + // - it could be changed to a file (or device, pipe, socket, ...) + // but either subsequent Mkdirat() fails or we mount this file + // to user's container. Security is no violated in both cases + // and user either gets error or the file that it can already access. + + if err = syscall.Close(parentFD); err != nil { + klog.V(4).Infof("Closing FD %v failed for safemkdir(%v): %v", parentFD, pathname, err) + } + parentFD = childFD + childFD = -1 + + // Everything was created. mkdirat(..., perm) above was affected by current + // umask and we must apply the right permissions to the all created directory. + // (that's the one that will be available to the container as subpath) + // so user can read/write it. + // parentFD is the last created directory. + + // Translate perm (os.FileMode) to uint32 that fchmod() expects + kernelPerm := uint32(perm & os.ModePerm) + if perm&os.ModeSetgid > 0 { + kernelPerm |= syscall.S_ISGID + } + if perm&os.ModeSetuid > 0 { + kernelPerm |= syscall.S_ISUID + } + if perm&os.ModeSticky > 0 { + kernelPerm |= syscall.S_ISVTX + } + if err = syscall.Fchmod(parentFD, kernelPerm); err != nil { + return fmt.Errorf("chmod %q failed: %s", currentPath, err) + } + } + + return nil +} + +// findExistingPrefix finds prefix of pathname that exists. In addition, it +// returns list of remaining directories that don't exist yet. +func findExistingPrefix(base, pathname string) (string, []string, error) { + rel, err := filepath.Rel(base, pathname) + if err != nil { + return base, nil, err + } + dirs := strings.Split(rel, string(filepath.Separator)) + + // Do OpenAt in a loop to find the first non-existing dir. Resolve symlinks. + // This should be faster than looping through all dirs and calling os.Stat() + // on each of them, as the symlinks are resolved only once with OpenAt(). + currentPath := base + fd, err := syscall.Open(currentPath, syscall.O_RDONLY|syscall.O_CLOEXEC, 0) + if err != nil { + return pathname, nil, fmt.Errorf("error opening %s: %s", currentPath, err) + } + defer func() { + if err = syscall.Close(fd); err != nil { + klog.V(4).Infof("Closing FD %v failed for findExistingPrefix(%v): %v", fd, pathname, err) + } + }() + for i, dir := range dirs { + // Using O_PATH here will prevent hangs in case user replaces directory with + // fifo + childFD, err := doOpenat(fd, dir, O_PATH_PORTABLE|unix.O_CLOEXEC, 0) + if err != nil { + if os.IsNotExist(err) { + return currentPath, dirs[i:], nil + } + return base, nil, err + } + if err = syscall.Close(fd); err != nil { + klog.V(4).Infof("Closing FD %v failed for findExistingPrefix(%v): %v", fd, pathname, err) + } + fd = childFD + currentPath = filepath.Join(currentPath, dir) + } + return pathname, []string{}, nil +} + +// This implementation is shared between Linux and NsEnterMounter +// Open path and return its fd. +// Symlinks are disallowed (pathname must already resolve symlinks), +// and the path must be within the base directory. +func doSafeOpen(pathname string, base string) (int, error) { + pathname = filepath.Clean(pathname) + base = filepath.Clean(base) + + // Calculate segments to follow + subpath, err := filepath.Rel(base, pathname) + if err != nil { + return -1, err + } + segments := strings.Split(subpath, string(filepath.Separator)) + + // Assumption: base is the only directory that we have under control. + // Base dir is not allowed to be a symlink. + parentFD, err := syscall.Open(base, nofollowFlags|unix.O_CLOEXEC, 0) + if err != nil { + return -1, fmt.Errorf("cannot open directory %s: %s", base, err) + } + defer func() { + if parentFD != -1 { + if err = syscall.Close(parentFD); err != nil { + klog.V(4).Infof("Closing FD %v failed for safeopen(%v): %v", parentFD, pathname, err) + } + } + }() + + childFD := -1 + defer func() { + if childFD != -1 { + if err = syscall.Close(childFD); err != nil { + klog.V(4).Infof("Closing FD %v failed for safeopen(%v): %v", childFD, pathname, err) + } + } + }() + + currentPath := base + + // Follow the segments one by one using openat() to make + // sure the user cannot change already existing directories into symlinks. + for _, seg := range segments { + var deviceStat unix.Stat_t + + currentPath = filepath.Join(currentPath, seg) + if !mount.PathWithinBase(currentPath, base) { + return -1, fmt.Errorf("path %s is outside of allowed base %s", currentPath, base) + } + + // Trigger auto mount if it's an auto-mounted directory, ignore error if not a directory. + // Notice the trailing slash is mandatory, see "automount" in openat(2) and open_by_handle_at(2). + unix.Fstatat(parentFD, seg+"/", &deviceStat, unix.AT_SYMLINK_NOFOLLOW) + + klog.V(5).Infof("Opening path %s", currentPath) + childFD, err = doOpenat(parentFD, seg, openFDFlags|unix.O_CLOEXEC, 0) + if err != nil { + return -1, fmt.Errorf("cannot open %s: %s", currentPath, err) + } + + err := unix.Fstat(childFD, &deviceStat) + if err != nil { + return -1, fmt.Errorf("error running fstat on %s with %v", currentPath, err) + } + fileFmt := deviceStat.Mode & syscall.S_IFMT + if fileFmt == syscall.S_IFLNK { + return -1, fmt.Errorf("unexpected symlink found %s", currentPath) + } + + // Close parentFD + if err = syscall.Close(parentFD); err != nil { + return -1, fmt.Errorf("closing fd for %q failed: %v", filepath.Dir(currentPath), err) + } + // Set child to new parent + parentFD = childFD + childFD = -1 + } + + // We made it to the end, return this fd, don't close it + finalFD := parentFD + parentFD = -1 + + return finalFD, nil +} diff --git a/pkg/volume/util/subpath/subpath_unsupported.go b/pkg/volume/util/subpath/subpath_unsupported.go index 21493426da8f..32ba50ed7002 100644 --- a/pkg/volume/util/subpath/subpath_unsupported.go +++ b/pkg/volume/util/subpath/subpath_unsupported.go @@ -1,5 +1,5 @@ -//go:build !linux && !windows -// +build !linux,!windows +//go:build !linux && !windows && !freebsd +// +build !linux,!windows,!freebsd /* Copyright 2014 The Kubernetes Authors. diff --git a/staging/src/k8s.io/mount-utils/mount_helper_freebsd.go b/staging/src/k8s.io/mount-utils/mount_helper_freebsd.go new file mode 100644 index 000000000000..4cb71f3bd616 --- /dev/null +++ b/staging/src/k8s.io/mount-utils/mount_helper_freebsd.go @@ -0,0 +1,56 @@ +//go:build freebsd +// +build freebsd + +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package mount + +// MountInfo is a pure copy of the linux one, for now +type MountInfo struct { // nolint: golint + // Unique ID for the mount (maybe reused after umount). + ID int + // The ID of the parent mount (or of self for the root of this mount namespace's mount tree). + ParentID int + // Major indicates one half of the device ID which identifies the device class + // (parsed from `st_dev` for files on this filesystem). + Major int + // Minor indicates one half of the device ID which identifies a specific + // instance of device (parsed from `st_dev` for files on this filesystem). + Minor int + // The pathname of the directory in the filesystem which forms the root of this mount. + Root string + // Mount source, filesystem-specific information. e.g. device, tmpfs name. + Source string + // Mount point, the pathname of the mount point. + MountPoint string + // Optional fieds, zero or more fields of the form "tag[:value]". + OptionalFields []string + // The filesystem type in the form "type[.subtype]". + FsType string + // Per-mount options. + MountOptions []string + // Per-superblock options. + SuperOptions []string +} + +// ParseMountInfo TODO: implement if needed, for FreeBSD (there is mountinfo, let's see) +func ParseMountInfo(filename string) ([]MountInfo, error) { + + infos := []MountInfo{} + + return infos, nil +} \ No newline at end of file diff --git a/staging/src/k8s.io/mount-utils/mount_helper_linux.go b/staging/src/k8s.io/mount-utils/mount_helper_linux.go new file mode 100644 index 000000000000..c4df2ad5ee1d --- /dev/null +++ b/staging/src/k8s.io/mount-utils/mount_helper_linux.go @@ -0,0 +1,151 @@ +//go:build linux +// +build linux + +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package mount + +import ( + "fmt" + "strconv" + "strings" + + utilio "k8s.io/utils/io" +) + +const ( + // At least number of fields per line in /proc//mountinfo. + expectedAtLeastNumFieldsPerMountInfo = 10 + // How many times to retry for a consistent read of /proc/mounts. + maxListTries = 10 +) + +// MountInfo represents a single line in /proc//mountinfo. +type MountInfo struct { // nolint: golint + // Unique ID for the mount (maybe reused after umount). + ID int + // The ID of the parent mount (or of self for the root of this mount namespace's mount tree). + ParentID int + // Major indicates one half of the device ID which identifies the device class + // (parsed from `st_dev` for files on this filesystem). + Major int + // Minor indicates one half of the device ID which identifies a specific + // instance of device (parsed from `st_dev` for files on this filesystem). + Minor int + // The pathname of the directory in the filesystem which forms the root of this mount. + Root string + // Mount source, filesystem-specific information. e.g. device, tmpfs name. + Source string + // Mount point, the pathname of the mount point. + MountPoint string + // Optional fieds, zero or more fields of the form "tag[:value]". + OptionalFields []string + // The filesystem type in the form "type[.subtype]". + FsType string + // Per-mount options. + MountOptions []string + // Per-superblock options. + SuperOptions []string +} + +// ParseMountInfo parses /proc/xxx/mountinfo. +func ParseMountInfo(filename string) ([]MountInfo, error) { + content, err := utilio.ConsistentRead(filename, maxListTries) + if err != nil { + return []MountInfo{}, err + } + contentStr := string(content) + infos := []MountInfo{} + + for _, line := range strings.Split(contentStr, "\n") { + if line == "" { + // the last split() item is empty string following the last \n + continue + } + // See `man proc` for authoritative description of format of the file. + fields := strings.Fields(line) + if len(fields) < expectedAtLeastNumFieldsPerMountInfo { + return nil, fmt.Errorf("wrong number of fields in (expected at least %d, got %d): %s", expectedAtLeastNumFieldsPerMountInfo, len(fields), line) + } + id, err := strconv.Atoi(fields[0]) + if err != nil { + return nil, err + } + parentID, err := strconv.Atoi(fields[1]) + if err != nil { + return nil, err + } + mm := strings.Split(fields[2], ":") + if len(mm) != 2 { + return nil, fmt.Errorf("parsing '%s' failed: unexpected minor:major pair %s", line, mm) + } + major, err := strconv.Atoi(mm[0]) + if err != nil { + return nil, fmt.Errorf("parsing '%s' failed: unable to parse major device id, err:%v", mm[0], err) + } + minor, err := strconv.Atoi(mm[1]) + if err != nil { + return nil, fmt.Errorf("parsing '%s' failed: unable to parse minor device id, err:%v", mm[1], err) + } + + info := MountInfo{ + ID: id, + ParentID: parentID, + Major: major, + Minor: minor, + Root: fields[3], + MountPoint: fields[4], + MountOptions: splitMountOptions(fields[5]), + } + // All fields until "-" are "optional fields". + i := 6 + for ; i < len(fields) && fields[i] != "-"; i++ { + info.OptionalFields = append(info.OptionalFields, fields[i]) + } + // Parse the rest 3 fields. + i++ + if len(fields)-i < 3 { + return nil, fmt.Errorf("expect 3 fields in %s, got %d", line, len(fields)-i) + } + info.FsType = fields[i] + info.Source = fields[i+1] + info.SuperOptions = splitMountOptions(fields[i+2]) + infos = append(infos, info) + } + return infos, nil +} + +// splitMountOptions parses comma-separated list of mount options into an array. +// It respects double quotes - commas in them are not considered as the option separator. +func splitMountOptions(s string) []string { + inQuotes := false + list := strings.FieldsFunc(s, func(r rune) bool { + if r == '"' { + inQuotes = !inQuotes + } + // Report a new field only when outside of double quotes. + return r == ',' && !inQuotes + }) + return list +} + +// isMountPointMatch returns true if the path in mp is the same as dir. +// Handles case where mountpoint dir has been renamed due to stale NFS mount. +func isMountPointMatch(mp MountPoint, dir string) bool { + deletedDir := fmt.Sprintf("%s\\040(deleted)", dir) + return ((mp.Path == dir) || (mp.Path == deletedDir)) +} diff --git a/staging/src/k8s.io/mount-utils/mount_helper_unix_test.go b/staging/src/k8s.io/mount-utils/mount_helper_linux_test.go similarity index 99% rename from staging/src/k8s.io/mount-utils/mount_helper_unix_test.go rename to staging/src/k8s.io/mount-utils/mount_helper_linux_test.go index 6fbc79e3e4d0..738be863ef04 100644 --- a/staging/src/k8s.io/mount-utils/mount_helper_unix_test.go +++ b/staging/src/k8s.io/mount-utils/mount_helper_linux_test.go @@ -1,5 +1,5 @@ -//go:build !windows -// +build !windows +//go:build linux +// +build linux /* Copyright 2019 The Kubernetes Authors. diff --git a/staging/src/k8s.io/mount-utils/mount_helper_unix.go b/staging/src/k8s.io/mount-utils/mount_helper_unix.go index cb8732fce74c..5d70db9a4bce 100644 --- a/staging/src/k8s.io/mount-utils/mount_helper_unix.go +++ b/staging/src/k8s.io/mount-utils/mount_helper_unix.go @@ -21,22 +21,11 @@ package mount import ( "errors" - "fmt" "io/fs" "os" - "strconv" - "strings" "syscall" "k8s.io/klog/v2" - utilio "k8s.io/utils/io" -) - -const ( - // At least number of fields per line in /proc//mountinfo. - expectedAtLeastNumFieldsPerMountInfo = 10 - // How many times to retry for a consistent read of /proc/mounts. - maxListTries = 10 ) // IsCorruptedMnt return true if err is about corrupted mount point @@ -61,122 +50,6 @@ func IsCorruptedMnt(err error) bool { return underlyingError == syscall.ENOTCONN || underlyingError == syscall.ESTALE || underlyingError == syscall.EIO || underlyingError == syscall.EACCES || underlyingError == syscall.EHOSTDOWN } -// MountInfo represents a single line in /proc//mountinfo. -type MountInfo struct { // nolint: golint - // Unique ID for the mount (maybe reused after umount). - ID int - // The ID of the parent mount (or of self for the root of this mount namespace's mount tree). - ParentID int - // Major indicates one half of the device ID which identifies the device class - // (parsed from `st_dev` for files on this filesystem). - Major int - // Minor indicates one half of the device ID which identifies a specific - // instance of device (parsed from `st_dev` for files on this filesystem). - Minor int - // The pathname of the directory in the filesystem which forms the root of this mount. - Root string - // Mount source, filesystem-specific information. e.g. device, tmpfs name. - Source string - // Mount point, the pathname of the mount point. - MountPoint string - // Optional fieds, zero or more fields of the form "tag[:value]". - OptionalFields []string - // The filesystem type in the form "type[.subtype]". - FsType string - // Per-mount options. - MountOptions []string - // Per-superblock options. - SuperOptions []string -} - -// ParseMountInfo parses /proc/xxx/mountinfo. -func ParseMountInfo(filename string) ([]MountInfo, error) { - content, err := utilio.ConsistentRead(filename, maxListTries) - if err != nil { - return []MountInfo{}, err - } - contentStr := string(content) - infos := []MountInfo{} - - for _, line := range strings.Split(contentStr, "\n") { - if line == "" { - // the last split() item is empty string following the last \n - continue - } - // See `man proc` for authoritative description of format of the file. - fields := strings.Fields(line) - if len(fields) < expectedAtLeastNumFieldsPerMountInfo { - return nil, fmt.Errorf("wrong number of fields in (expected at least %d, got %d): %s", expectedAtLeastNumFieldsPerMountInfo, len(fields), line) - } - id, err := strconv.Atoi(fields[0]) - if err != nil { - return nil, err - } - parentID, err := strconv.Atoi(fields[1]) - if err != nil { - return nil, err - } - mm := strings.Split(fields[2], ":") - if len(mm) != 2 { - return nil, fmt.Errorf("parsing '%s' failed: unexpected minor:major pair %s", line, mm) - } - major, err := strconv.Atoi(mm[0]) - if err != nil { - return nil, fmt.Errorf("parsing '%s' failed: unable to parse major device id, err:%v", mm[0], err) - } - minor, err := strconv.Atoi(mm[1]) - if err != nil { - return nil, fmt.Errorf("parsing '%s' failed: unable to parse minor device id, err:%v", mm[1], err) - } - - info := MountInfo{ - ID: id, - ParentID: parentID, - Major: major, - Minor: minor, - Root: fields[3], - MountPoint: fields[4], - MountOptions: splitMountOptions(fields[5]), - } - // All fields until "-" are "optional fields". - i := 6 - for ; i < len(fields) && fields[i] != "-"; i++ { - info.OptionalFields = append(info.OptionalFields, fields[i]) - } - // Parse the rest 3 fields. - i++ - if len(fields)-i < 3 { - return nil, fmt.Errorf("expect 3 fields in %s, got %d", line, len(fields)-i) - } - info.FsType = fields[i] - info.Source = fields[i+1] - info.SuperOptions = splitMountOptions(fields[i+2]) - infos = append(infos, info) - } - return infos, nil -} - -// splitMountOptions parses comma-separated list of mount options into an array. -// It respects double quotes - commas in them are not considered as the option separator. -func splitMountOptions(s string) []string { - inQuotes := false - list := strings.FieldsFunc(s, func(r rune) bool { - if r == '"' { - inQuotes = !inQuotes - } - // Report a new field only when outside of double quotes. - return r == ',' && !inQuotes - }) - return list -} - -// isMountPointMatch returns true if the path in mp is the same as dir. -// Handles case where mountpoint dir has been renamed due to stale NFS mount. -func isMountPointMatch(mp MountPoint, dir string) bool { - deletedDir := fmt.Sprintf("%s\\040(deleted)", dir) - return ((mp.Path == dir) || (mp.Path == deletedDir)) -} - // PathExists returns true if the specified path exists. // TODO: clean this up to use pkg/util/file/FileExists func PathExists(path string) (bool, error) {