Skip to content

Commit

Permalink
kubelet: fix create create sandbox delete pod race
Browse files Browse the repository at this point in the history
  • Loading branch information
rphillips committed Feb 18, 2021
1 parent 9fb1aa9 commit f989ada
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 1 deletion.
4 changes: 3 additions & 1 deletion pkg/kubelet/kuberuntime/fake_kuberuntime_manager.go
Expand Up @@ -101,7 +101,9 @@ func newFakeKubeRuntimeManager(runtimeService internalapi.RuntimeService, imageS
return nil, err
}

kubeRuntimeManager.containerGC = newContainerGC(runtimeService, newFakePodStateProvider(), kubeRuntimeManager)
podStateProvider := newFakePodStateProvider()
kubeRuntimeManager.containerGC = newContainerGC(runtimeService, podStateProvider, kubeRuntimeManager)
kubeRuntimeManager.podStateProvider = podStateProvider
kubeRuntimeManager.runtimeName = typedVersion.RuntimeName
kubeRuntimeManager.imagePuller = images.NewImageManager(
kubecontainer.FilterEventRecorder(recorder),
Expand Down
12 changes: 12 additions & 0 deletions pkg/kubelet/kuberuntime/kuberuntime_manager.go
Expand Up @@ -137,6 +137,9 @@ type kubeGenericRuntimeManager struct {

// Cache last per-container error message to reduce log spam
logReduction *logreduction.LogReduction

// PodState provider instance
podStateProvider podStateProvider
}

// KubeGenericRuntime is a interface contains interfaces for container runtime and command.
Expand Down Expand Up @@ -248,6 +251,7 @@ func NewKubeGenericRuntimeManager(
imagePullBurst)
kubeRuntimeManager.runner = lifecycle.NewHandlerRunner(httpClient, kubeRuntimeManager, kubeRuntimeManager)
kubeRuntimeManager.containerGC = newContainerGC(runtimeService, podStateProvider, kubeRuntimeManager)
kubeRuntimeManager.podStateProvider = podStateProvider

kubeRuntimeManager.versionCache = cache.NewObjectCache(
func() (interface{}, error) {
Expand Down Expand Up @@ -751,6 +755,14 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, podStatus *kubecontaine
result.AddSyncResult(createSandboxResult)
podSandboxID, msg, err = m.createPodSandbox(pod, podContainerChanges.Attempt)
if err != nil {
// createPodSandbox can return an error from CNI, CSI,
// or CRI if the Pod has been deleted while the POD is
// being created. If the pod has been deleted then it's
// not a real error.
if m.podStateProvider.IsPodDeleted(pod.UID) {
klog.V(4).Infof("Pod %q was deleted and sandbox failed to be created: %v", format.Pod(pod), pod.UID)
return
}
createSandboxResult.Fail(kubecontainer.ErrCreatePodSandbox, msg)
klog.Errorf("createPodSandbox for pod %q failed: %v", format.Pod(pod), err)
ref, referr := ref.GetReference(legacyscheme.Scheme, pod)
Expand Down
35 changes: 35 additions & 0 deletions pkg/kubelet/kuberuntime/kuberuntime_manager_test.go
Expand Up @@ -1372,6 +1372,41 @@ func TestComputePodActionsWithInitAndEphemeralContainers(t *testing.T) {
}
}

func TestSyncPodWithSandboxAndDeletedPod(t *testing.T) {
fakeRuntime, _, m, err := createTestRuntimeManager()
assert.NoError(t, err)
fakeRuntime.ErrorOnSandboxCreate = true

containers := []v1.Container{
{
Name: "foo1",
Image: "busybox",
ImagePullPolicy: v1.PullIfNotPresent,
},
}
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "12345678",
Name: "foo",
Namespace: "new",
},
Spec: v1.PodSpec{
Containers: containers,
},
}

backOff := flowcontrol.NewBackOff(time.Second, time.Minute)

// GetPodStatus and the following SyncPod will not return errors in the
// case where the pod has been deleted. We are not adding any pods into
// the fakePodProvider so they are 'deleted'.
podStatus, err := m.GetPodStatus(pod.UID, pod.Name, pod.Namespace)
assert.NoError(t, err)
result := m.SyncPod(pod, podStatus, []v1.Secret{}, backOff)
// This will return an error if the pod has _not_ been deleted.
assert.NoError(t, result.Error())
}

func makeBasePodAndStatusWithInitAndEphemeralContainers() (*v1.Pod, *kubecontainer.PodStatus) {
pod, status := makeBasePodAndStatus()
pod.Spec.InitContainers = []v1.Container{
Expand Down
Expand Up @@ -67,6 +67,8 @@ type FakeRuntimeService struct {
Containers map[string]*FakeContainer
Sandboxes map[string]*FakePodSandbox
FakeContainerStats map[string]*runtimeapi.ContainerStats

ErrorOnSandboxCreate bool
}

// GetContainerID returns the unique container ID from the FakeRuntimeService.
Expand Down Expand Up @@ -198,6 +200,10 @@ func (r *FakeRuntimeService) RunPodSandbox(config *runtimeapi.PodSandboxConfig,
return "", err
}

if r.ErrorOnSandboxCreate {
return "", fmt.Errorf("error on sandbox create")
}

// PodSandboxID should be randomized for real container runtime, but here just use
// fixed name from BuildSandboxName() for easily making fake sandboxes.
podSandboxID := BuildSandboxName(config.Metadata)
Expand Down

0 comments on commit f989ada

Please sign in to comment.