Skip to content

Commit

Permalink
Merge pull request kubernetes#123593 from giuseppe/userns-use-kubelet…
Browse files Browse the repository at this point in the history
…-user-mappings

KEP-127: kubelet: honor kubelet user mappings
  • Loading branch information
k8s-ci-robot committed Mar 4, 2024
2 parents d34fbeb + 6174f19 commit 89cbd94
Show file tree
Hide file tree
Showing 7 changed files with 460 additions and 83 deletions.
3 changes: 2 additions & 1 deletion pkg/features/kube_features.go
Original file line number Diff line number Diff line change
Expand Up @@ -823,6 +823,7 @@ const (
// owner: @rata, @giuseppe
// kep: https://kep.k8s.io/127
// alpha: v1.25
// beta: v1.30
//
// Enables user namespace support for stateless pods.
UserNamespacesSupport featuregate.Feature = "UserNamespacesSupport"
Expand Down Expand Up @@ -1154,7 +1155,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS

VolumeCapacityPriority: {Default: false, PreRelease: featuregate.Alpha},

UserNamespacesSupport: {Default: false, PreRelease: featuregate.Alpha},
UserNamespacesSupport: {Default: false, PreRelease: featuregate.Beta},

WinDSR: {Default: false, PreRelease: featuregate.Alpha},

Expand Down
9 changes: 9 additions & 0 deletions pkg/kubelet/kubelet_getters.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,15 @@ func (kl *Kubelet) HandlerSupportsUserNamespaces(rtHandler string) (bool, error)
return h.SupportsUserNamespaces, nil
}

// GetKubeletMappings gets the additional IDs allocated for the Kubelet.
func (kl *Kubelet) GetKubeletMappings() (uint32, uint32, error) {
return kl.getKubeletMappings()
}

func (kl *Kubelet) GetMaxPods() int {
return kl.maxPods
}

// getPodDir returns the full path to the per-pod directory for the pod with
// the given UID.
func (kl *Kubelet) getPodDir(podUID types.UID) string {
Expand Down
86 changes: 86 additions & 0 deletions pkg/kubelet/kubelet_pods.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,18 @@ package kubelet
import (
"bytes"
"context"
goerrors "errors"
"fmt"
"io"
"net/http"
"net/url"
"os"
"os/exec"
"os/user"
"path/filepath"
"runtime"
"sort"
"strconv"
"strings"

"github.com/google/go-cmp/cmp"
Expand Down Expand Up @@ -76,8 +80,90 @@ const (
const (
PodInitializing = "PodInitializing"
ContainerCreating = "ContainerCreating"

kubeletUser = "kubelet"
)

// parseGetSubIdsOutput parses the output from the `getsubids` tool, which is used to query subordinate user or group ID ranges for
// a given user or group. getsubids produces a line for each mapping configured.
// Here we expect that there is a single mapping, and the same values are used for the subordinate user and group ID ranges.
// The output is something like:
// $ getsubids kubelet
// 0: kubelet 65536 2147483648
// $ getsubids -g kubelet
// 0: kubelet 65536 2147483648
func parseGetSubIdsOutput(input string) (uint32, uint32, error) {
lines := strings.Split(strings.Trim(input, "\n"), "\n")
if len(lines) != 1 {
return 0, 0, fmt.Errorf("error parsing line %q: it must contain only one line", input)
}

parts := strings.Fields(lines[0])
if len(parts) != 4 {
return 0, 0, fmt.Errorf("invalid line %q", input)
}

// Parsing the numbers
num1, err := strconv.ParseUint(parts[2], 10, 32)
if err != nil {
return 0, 0, fmt.Errorf("error parsing line %q: %w", input, err)
}

num2, err := strconv.ParseUint(parts[3], 10, 32)
if err != nil {
return 0, 0, fmt.Errorf("error parsing line %q: %w", input, err)
}

return uint32(num1), uint32(num2), nil
}

// getKubeletMappings returns the range of IDs that can be used to configure user namespaces.
// If subordinate user or group ID ranges are specified for the kubelet user and the getsubids tool
// is installed, then the single mapping specified both for user and group IDs will be used.
// If the tool is not installed, or there are no IDs configured, the default mapping is returned.
// The default mapping includes the entire IDs range except IDs below 65536.
func (kl *Kubelet) getKubeletMappings() (uint32, uint32, error) {
// default mappings to return if there is no specific configuration
const defaultFirstID = 1 << 16
const defaultLen = 1<<32 - defaultFirstID

if !utilfeature.DefaultFeatureGate.Enabled(features.UserNamespacesSupport) {
return defaultFirstID, defaultLen, nil
}

_, err := user.Lookup(kubeletUser)
if err != nil {
var unknownUserErr user.UnknownUserError
if goerrors.As(err, &unknownUserErr) {
// if the user is not found, we assume that the user is not configured
return defaultFirstID, defaultLen, nil
}
return 0, 0, err
}

execName := "getsubids"
cmd, err := exec.LookPath(execName)
if err != nil {
if os.IsNotExist(err) {
klog.V(2).InfoS("Could not find executable, default mappings will be used for the user namespaces", "executable", execName, "err", err)
return defaultFirstID, defaultLen, nil
}
return 0, 0, err
}
outUids, err := exec.Command(cmd, kubeletUser).Output()
if err != nil {
return 0, 0, fmt.Errorf("error retrieving additional ids for user %q", kubeletUser)
}
outGids, err := exec.Command(cmd, "-g", kubeletUser).Output()
if err != nil {
return 0, 0, fmt.Errorf("error retrieving additional gids for user %q", kubeletUser)
}
if string(outUids) != string(outGids) {
return 0, 0, fmt.Errorf("mismatched subuids and subgids for user %q", kubeletUser)
}
return parseGetSubIdsOutput(string(outUids))
}

// Get a list of pods that have data directories.
func (kl *Kubelet) listPodsFromDisk() ([]types.UID, error) {
podInfos, err := os.ReadDir(kl.getPodsDir())
Expand Down
74 changes: 74 additions & 0 deletions pkg/kubelet/kubelet_pods_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6013,3 +6013,77 @@ func TestGetNonExistentImagePullSecret(t *testing.T) {
event := <-fakeRecorder.Events
assert.Equal(t, event, expectedEvent)
}

func TestParseGetSubIdsOutput(t *testing.T) {
tests := []struct {
name string
input string
wantFirstID uint32
wantRangeLen uint32
wantErr bool
}{
{
name: "valid",
input: "0: kubelet 65536 2147483648",
wantFirstID: 65536,
wantRangeLen: 2147483648,
},
{
name: "multiple lines",
input: "0: kubelet 1 2\n1: kubelet 3 4\n",
wantErr: true,
},
{
name: "wrong format",
input: "0: kubelet 65536",
wantErr: true,
},
{
name: "non numeric 1",
input: "0: kubelet Foo 65536",
wantErr: true,
},
{
name: "non numeric 2",
input: "0: kubelet 0 Bar",
wantErr: true,
},
{
name: "overflow 1",
input: "0: kubelet 4294967296 2147483648",
wantErr: true,
},
{
name: "overflow 2",
input: "0: kubelet 65536 4294967296",
wantErr: true,
},
{
name: "negative value 1",
input: "0: kubelet -1 2147483648",
wantErr: true,
},
{
name: "negative value 2",
input: "0: kubelet 65536 -1",
wantErr: true,
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
gotFirstID, gotRangeLen, err := parseGetSubIdsOutput(tc.input)
if tc.wantErr {
if err == nil {
t.Errorf("%s: expected error, got nil", tc.name)
}
} else {
if err != nil {
t.Errorf("%s: unexpected error: %v", tc.name, err)
}
if gotFirstID != tc.wantFirstID || gotRangeLen != tc.wantRangeLen {
t.Errorf("%s: got (%d, %d), want (%d, %d)", tc.name, gotFirstID, gotRangeLen, tc.wantFirstID, tc.wantRangeLen)
}
}
})
}
}
85 changes: 44 additions & 41 deletions pkg/kubelet/userns/userns_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ package userns
import (
"encoding/json"
"fmt"
"math"
"os"
"path/filepath"
"sync"
Expand All @@ -40,10 +39,6 @@ import (
// length for the user namespace to create (65536).
const userNsLength = (1 << 16)

// Limit the total number of pods using userns in this node to this value.
// This is an alpha limitation that will probably be lifted later.
const maxPods = 1024

// Create a new map when we removed enough pods to avoid memory leaks
// since Go maps never free memory.
const mapReInitializeThreshold = 1000
Expand All @@ -52,14 +47,19 @@ type userNsPodsManager interface {
HandlerSupportsUserNamespaces(runtimeHandler string) (bool, error)
GetPodDir(podUID types.UID) string
ListPodsFromDisk() ([]types.UID, error)
GetKubeletMappings() (uint32, uint32, error)
GetMaxPods() int
}

type UsernsManager struct {
used *allocator.AllocationBitmap
usedBy map[types.UID]uint32 // Map pod.UID to range used
removed int
numAllocated int
kl userNsPodsManager
used *allocator.AllocationBitmap
usedBy map[types.UID]uint32 // Map pod.UID to range used
removed int

off int
len int

kl userNsPodsManager
// This protects all members except for kl.anager
lock sync.Mutex
}
Expand Down Expand Up @@ -130,16 +130,33 @@ func (m *UsernsManager) readMappingsFromFile(pod types.UID) ([]byte, error) {
}

func MakeUserNsManager(kl userNsPodsManager) (*UsernsManager, error) {
kubeletMappingID, kubeletMappingLen, err := kl.GetKubeletMappings()
if err != nil {
return nil, err
}

if kubeletMappingID%userNsLength != 0 {
return nil, fmt.Errorf("kubelet user assigned ID %v is not a multiple of %v", kubeletMappingID, userNsLength)
}
if kubeletMappingID < userNsLength {
// We don't allow to map 0, as security is circumvented.
return nil, fmt.Errorf("kubelet user assigned ID %v must be greater or equal to %v", kubeletMappingID, userNsLength)
}
if kubeletMappingLen%userNsLength != 0 {
return nil, fmt.Errorf("kubelet user assigned IDs length %v is not a multiple of %v", kubeletMappingLen, userNsLength)
}
if kubeletMappingLen/userNsLength < uint32(kl.GetMaxPods()) {
return nil, fmt.Errorf("kubelet user assigned IDs are not enough to support %v pods", kl.GetMaxPods())
}
off := int(kubeletMappingID / userNsLength)
len := int(kubeletMappingLen / userNsLength)

m := UsernsManager{
// Create a bitArray for all the UID space (2^32).
// As a by product of that, no index param to bitArray can be out of bounds (index is uint32).
used: allocator.NewAllocationMap((math.MaxUint32+1)/userNsLength, "user namespaces"),
used: allocator.NewAllocationMap(len, "user namespaces"),
usedBy: make(map[types.UID]uint32),
kl: kl,
}
// First block is reserved for the host.
if _, err := m.used.Allocate(0); err != nil {
return nil, err
off: off,
len: len,
}

// do not bother reading the list of pods if user namespaces are not enabled.
Expand Down Expand Up @@ -184,24 +201,17 @@ func (m *UsernsManager) recordPodMappings(pod types.UID) error {

// isSet checks if the specified index is already set.
func (m *UsernsManager) isSet(v uint32) bool {
index := int(v / userNsLength)
index := int(v/userNsLength) - m.off
if index < 0 || index >= m.len {
return true
}
return m.used.Has(index)
}

// allocateOne finds a free user namespace and allocate it to the specified pod.
// The first return value is the first ID in the user namespace, the second returns
// the length for the user namespace range.
func (m *UsernsManager) allocateOne(pod types.UID) (firstID uint32, length uint32, err error) {
if m.numAllocated >= maxPods {
return 0, 0, fmt.Errorf("limit on count of pods with user namespaces exceeded (limit is %v, current pods with userns: %v)", maxPods, m.numAllocated)
}
m.numAllocated++
defer func() {
if err != nil {
m.numAllocated--
}
}()

firstZero, found, err := m.used.AllocateNext()
if err != nil {
return 0, 0, err
Expand All @@ -212,7 +222,7 @@ func (m *UsernsManager) allocateOne(pod types.UID) (firstID uint32, length uint3

klog.V(5).InfoS("new pod user namespace allocation", "podUID", pod)

firstID = uint32(firstZero * userNsLength)
firstID = uint32((firstZero + m.off) * userNsLength)
m.usedBy[pod] = firstID
return firstID, userNsLength, nil
}
Expand All @@ -229,7 +239,10 @@ func (m *UsernsManager) record(pod types.UID, from, length uint32) (err error) {
if found && prevFrom != from {
return fmt.Errorf("different user namespace range already used by pod %q", pod)
}
index := int(from / userNsLength)
index := int(from/userNsLength) - m.off
if index < 0 || index >= m.len {
return fmt.Errorf("id %v is out of range", from)
}
// if the pod wasn't found then verify the range is free.
if !found && m.used.Has(index) {
return fmt.Errorf("range picked for pod %q already taken", pod)
Expand All @@ -238,15 +251,6 @@ func (m *UsernsManager) record(pod types.UID, from, length uint32) (err error) {
if found && prevFrom == from {
return nil
}
if m.numAllocated >= maxPods {
return fmt.Errorf("limit on count of pods with user namespaces exceeded (limit is %v, current pods with userns: %v)", maxPods, m.numAllocated)
}
m.numAllocated++
defer func() {
if err != nil {
m.numAllocated--
}
}()

klog.V(5).InfoS("new pod user namespace allocation", "podUID", pod)

Expand Down Expand Up @@ -291,7 +295,6 @@ func (m *UsernsManager) releaseWithLock(pod types.UID) {
delete(m.usedBy, pod)

klog.V(5).InfoS("releasing pod user namespace allocation", "podUID", pod)
m.numAllocated--
m.removed++

_ = os.Remove(filepath.Join(m.kl.GetPodDir(pod), mappingsFile))
Expand All @@ -304,7 +307,7 @@ func (m *UsernsManager) releaseWithLock(pod types.UID) {
m.usedBy = n
m.removed = 0
}
m.used.Release(int(v / userNsLength))
_ = m.used.Release(int(v/userNsLength) - m.off)
}

func (m *UsernsManager) parseUserNsFileAndRecord(pod types.UID, content []byte) (userNs userNamespace, err error) {
Expand Down

0 comments on commit 89cbd94

Please sign in to comment.