Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

kubelet: fix sandbox garbage collection #43053

Merged
merged 2 commits into from
May 3, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
106 changes: 69 additions & 37 deletions pkg/kubelet/kuberuntime/kuberuntime_gc.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,6 @@ import (
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
)

// sandboxMinGCAge is the minimum age for an empty sandbox before it is garbage collected.
// This is introduced to avoid a sandbox being garbage collected before its containers are
// created.
// Notice that if the first container of a sandbox is created too late (exceeds sandboxMinGCAge),
// the sandbox could still be garbaged collected. In that case, SyncPod will recreate the
// sandbox and make sure old containers are all stopped.
// In the following figure, 'o' is a stopped sandbox, 'x' is a removed sandbox. It shows
// that, approximately if a sandbox keeps crashing and MinAge = 1/n GC Period, there will
// be 1/n more sandboxes not garbage collected.
// oooooo|xxxxxx|xxxxxx| <--- MinAge = 0
// gc gc gc gc
// oooooo|oooxxx|xxxxxx| <--- MinAge = 1/2 GC Perod
const sandboxMinGCAge time.Duration = 30 * time.Second

// containerGC is the manager of garbage collection.
type containerGC struct {
client internalapi.RuntimeService
Expand Down Expand Up @@ -72,6 +58,16 @@ type containerGCInfo struct {
createTime time.Time
}

// sandboxGCInfo is the internal information kept for sandboxes being considered for GC.
type sandboxGCInfo struct {
// The ID of the sandbox.
id string
// Creation time for the sandbox.
createTime time.Time
// If true, the sandbox is ready or still has containers.
active bool
}

// evictUnit is considered for eviction as units of (UID, container name) pair.
type evictUnit struct {
// UID of the pod.
Expand All @@ -81,6 +77,7 @@ type evictUnit struct {
}

type containersByEvictUnit map[evictUnit][]containerGCInfo
type sandboxesByPodUID map[types.UID][]sandboxGCInfo

// NumContainers returns the number of containers in this map.
func (cu containersByEvictUnit) NumContainers() int {
Expand All @@ -103,6 +100,13 @@ func (a byCreated) Len() int { return len(a) }
func (a byCreated) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a byCreated) Less(i, j int) bool { return a[i].createTime.After(a[j].createTime) }

// Newest first.
type sandboxByCreated []sandboxGCInfo

func (a sandboxByCreated) Len() int { return len(a) }
func (a sandboxByCreated) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a sandboxByCreated) Less(i, j int) bool { return a[i].createTime.After(a[j].createTime) }

// enforceMaxContainersPerEvictUnit enforces MaxPerPodContainer for each evictUnit.
func (cgc *containerGC) enforceMaxContainersPerEvictUnit(evictUnits containersByEvictUnit, MaxContainers int) {
for key := range evictUnits {
Expand All @@ -118,7 +122,7 @@ func (cgc *containerGC) enforceMaxContainersPerEvictUnit(evictUnits containersBy
func (cgc *containerGC) removeOldestN(containers []containerGCInfo, toRemove int) []containerGCInfo {
// Remove from oldest to newest (last to first).
numToKeep := len(containers) - toRemove
for i := numToKeep; i < len(containers); i++ {
for i := len(containers) - 1; i >= numToKeep; i-- {
if err := cgc.manager.removeContainer(containers[i].id); err != nil {
glog.Errorf("Failed to remove container %q: %v", containers[i].id, err)
}
Expand All @@ -128,6 +132,18 @@ func (cgc *containerGC) removeOldestN(containers []containerGCInfo, toRemove int
return containers[:numToKeep]
}

// removeOldestNSandboxes removes the oldest inactive toRemove sandboxes and
// returns the resulting slice.
func (cgc *containerGC) removeOldestNSandboxes(sandboxes []sandboxGCInfo, toRemove int) {
// Remove from oldest to newest (last to first).
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this oldest to newest? Seems to be newest to oldest...

And also the same question for the container remove function.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ACK. Would fix this.

numToKeep := len(sandboxes) - toRemove
for i := len(sandboxes) - 1; i >= numToKeep; i-- {
if !sandboxes[i].active {
cgc.removeSandbox(sandboxes[i].id)
}
}
}

// removeSandbox removes the sandbox by sandboxID.
func (cgc *containerGC) removeSandbox(sandboxID string) {
glog.V(4).Infof("Removing sandbox %q", sandboxID)
Expand Down Expand Up @@ -239,9 +255,13 @@ func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.ContainerGCPolicy
return nil
}

// evictSandboxes evicts all sandboxes that are evictable. Evictable sandboxes are: not running
// and contains no containers at all.
func (cgc *containerGC) evictSandboxes(minAge time.Duration) error {
// evictSandboxes remove all evictable sandboxes. An evictable sandbox must
// meet the following requirements:
// 1. not in ready state
// 2. contains no containers.
// 3. belong to a non-existent (i.e., already removed) pod, or is not the
// most recently created sandbox for the pod.
func (cgc *containerGC) evictSandboxes() error {
containers, err := cgc.manager.getKubeletContainers(true)
if err != nil {
return err
Expand All @@ -252,38 +272,50 @@ func (cgc *containerGC) evictSandboxes(minAge time.Duration) error {
return err
}

evictSandboxes := make([]string, 0)
newestGCTime := time.Now().Add(-minAge)
sandboxesByPod := make(sandboxesByPodUID)
for _, sandbox := range sandboxes {
// Prune out ready sandboxes.
podUID := types.UID(sandbox.Metadata.Uid)
sandboxInfo := sandboxGCInfo{
id: sandbox.Id,
createTime: time.Unix(0, sandbox.CreatedAt),
}

// Set ready sandboxes to be active.
if sandbox.State == runtimeapi.PodSandboxState_SANDBOX_READY {
continue
sandboxInfo.active = true
}

// Prune out sandboxes that still have containers.
found := false
// Set sandboxes that still have containers to be active.
hasContainers := false
sandboxID := sandbox.Id
for _, container := range containers {
if container.PodSandboxId == sandboxID {
found = true
hasContainers = true
break
}
}
if found {
continue
if hasContainers {
sandboxInfo.active = true
}

// Only garbage collect sandboxes older than sandboxMinGCAge.
createdAt := time.Unix(0, sandbox.CreatedAt)
if createdAt.After(newestGCTime) {
continue
}
glog.V(4).Infof("PodSandbox %q is eligible for garbage collection since it was created before %v: %+v", sandboxID, newestGCTime, sandbox)
evictSandboxes = append(evictSandboxes, sandboxID)
sandboxesByPod[podUID] = append(sandboxesByPod[podUID], sandboxInfo)
}

for _, sandbox := range evictSandboxes {
cgc.removeSandbox(sandbox)
// Sort the sandboxes by age.
for uid := range sandboxesByPod {
sort.Sort(sandboxByCreated(sandboxesByPod[uid]))
}

for podUID, sandboxes := range sandboxesByPod {
if cgc.isPodDeleted(podUID) {
// Remove all evictable sandboxes if the pod has been removed.
// Note that the latest dead sandbox is also removed if there is
// already an active one.
cgc.removeOldestNSandboxes(sandboxes, len(sandboxes))
} else {
// Keep latest one if the pod still exists.
cgc.removeOldestNSandboxes(sandboxes, len(sandboxes)-1)
}
}
return nil
}
Expand Down Expand Up @@ -342,7 +374,7 @@ func (cgc *containerGC) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy,
}

// Remove sandboxes with zero containers
if err := cgc.evictSandboxes(sandboxMinGCAge); err != nil {
if err := cgc.evictSandboxes(); err != nil {
return err
}

Expand Down
59 changes: 39 additions & 20 deletions pkg/kubelet/kuberuntime/kuberuntime_gc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,19 @@ func TestSandboxGC(t *testing.T) {
fakeRuntime, _, m, err := createTestRuntimeManager()
assert.NoError(t, err)

fakePodGetter := m.containerGC.podGetter.(*fakePodGetter)
makeGCSandbox := func(pod *v1.Pod, attempt uint32, state runtimeapi.PodSandboxState, withPodGetter bool) sandboxTemplate {
if withPodGetter {
// initialize the pod getter
fakePodGetter.pods[pod.UID] = pod
}
return sandboxTemplate{
pod: pod,
state: state,
attempt: attempt,
}
}

pods := []*v1.Pod{
makeTestPod("foo1", "new", "1234", []v1.Container{
makeTestContainer("bar1", "busybox"),
Expand All @@ -42,6 +55,9 @@ func TestSandboxGC(t *testing.T) {
makeTestPod("foo2", "new", "5678", []v1.Container{
makeTestContainer("bar3", "busybox"),
}),
makeTestPod("deleted", "new", "9012", []v1.Container{
makeTestContainer("bar4", "busybox"),
}),
}

for c, test := range []struct {
Expand All @@ -52,55 +68,58 @@ func TestSandboxGC(t *testing.T) {
remain []int // template indexes of remaining sandboxes
}{
{
description: "sandbox with no containers should be garbage collected.",
description: "notready sandboxes without containers for deleted pods should be garbage collected.",
sandboxes: []sandboxTemplate{
{pod: pods[0], state: runtimeapi.PodSandboxState_SANDBOX_NOTREADY},
makeGCSandbox(pods[2], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, false),
},
containers: []containerTemplate{},
remain: []int{},
},
{
description: "running sandbox should not be garbage collected.",
description: "ready sandboxes without containers for deleted pods should not be garbage collected.",
sandboxes: []sandboxTemplate{
{pod: pods[0], state: runtimeapi.PodSandboxState_SANDBOX_READY},
makeGCSandbox(pods[2], 0, runtimeapi.PodSandboxState_SANDBOX_READY, false),
},
containers: []containerTemplate{},
remain: []int{0},
},
{
description: "sandboxes for existing pods should not be garbage collected.",
sandboxes: []sandboxTemplate{
makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_READY, true),
makeGCSandbox(pods[1], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true),
},
containers: []containerTemplate{},
remain: []int{0, 1},
},
{
description: "sandbox with containers should not be garbage collected.",
sandboxes: []sandboxTemplate{
{pod: pods[0], state: runtimeapi.PodSandboxState_SANDBOX_NOTREADY},
makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, false),
},
containers: []containerTemplate{
{pod: pods[0], container: &pods[0].Spec.Containers[0], state: runtimeapi.ContainerState_CONTAINER_EXITED},
},
remain: []int{0},
},
{
description: "sandbox within min age should not be garbage collected.",
sandboxes: []sandboxTemplate{
{pod: pods[0], createdAt: time.Now().UnixNano(), state: runtimeapi.PodSandboxState_SANDBOX_NOTREADY},
{pod: pods[1], createdAt: time.Now().Add(-2 * time.Hour).UnixNano(), state: runtimeapi.PodSandboxState_SANDBOX_NOTREADY},
},
containers: []containerTemplate{},
minAge: time.Hour, // assume the test won't take an hour
remain: []int{0},
},
{
description: "multiple sandboxes should be handled properly.",
sandboxes: []sandboxTemplate{
// running sandbox.
{pod: pods[0], attempt: 1, state: runtimeapi.PodSandboxState_SANDBOX_READY},
makeGCSandbox(pods[0], 1, runtimeapi.PodSandboxState_SANDBOX_READY, true),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should have another exit pods[0] to make sure we don't keep latest exit if there is a running one.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ACK.

// exited sandbox without containers.
makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, false),
// exited sandbox with containers.
{pod: pods[1], attempt: 1, state: runtimeapi.PodSandboxState_SANDBOX_NOTREADY},
makeGCSandbox(pods[1], 1, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true),
// exited sandbox without containers.
{pod: pods[1], attempt: 0, state: runtimeapi.PodSandboxState_SANDBOX_NOTREADY},
makeGCSandbox(pods[1], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, false),
// exited sandbox without containers for deleted pods.
makeGCSandbox(pods[2], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, false),
},
containers: []containerTemplate{
{pod: pods[1], container: &pods[1].Spec.Containers[0], sandboxAttempt: 1, state: runtimeapi.ContainerState_CONTAINER_EXITED},
},
remain: []int{0, 1},
remain: []int{0, 2},
},
} {
t.Logf("TestCase #%d: %+v", c, test)
Expand All @@ -109,7 +128,7 @@ func TestSandboxGC(t *testing.T) {
fakeRuntime.SetFakeSandboxes(fakeSandboxes)
fakeRuntime.SetFakeContainers(fakeContainers)

err := m.containerGC.evictSandboxes(test.minAge)
err := m.containerGC.evictSandboxes()
assert.NoError(t, err)
realRemain, err := fakeRuntime.ListPodSandbox(nil)
assert.NoError(t, err)
Expand Down