virtcontainers: Return the appropriate container status

When our runtime is asked for the container status, we also handle the scenario where the container is stopped if the shim process for that container on the host has terminated. In the current implementation, we retrieve the container status before stopping the container, causing a wrong status to be returned. The wait for the original go-routine's completion was done in a defer within the caller of statusContainers(), resulting in the statusContainer()'s values to return the pre-stopped value. This bug is first observed when updating to docker v18.09/containerd v1.2.0. With the current implementation, containerd-shim receives the TaskExit when it detects kata-shim is terminating. When checking the container state, however, it does not get the expected "stopped" value. The following commit resolves the described issue by simplifying the locking used around the status container calls. Originally StatusContainer would request a read lock. If we needed to update the container status in statusContainer, we'd start a go-routine which would request a read-write lock, waiting for the original read lock to be released. Can't imagine a bug could linger in this logic. We now just request a read-write lock in the caller (StatusContainer), skipping the need for a separate go-routine and defer. This greatly simplifies the logic, and removes the original bug. Fixes #926 Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
kata-containers · Nov 29, 2018 · fa9b15d · fa9b15d
1 parent e06c8aa
commit fa9b15d
Showing 1 changed file with 19 additions and 37 deletions.
diff --git a/virtcontainers/api.go b/virtcontainers/api.go
@@ -337,10 +337,11 @@ func StatusSandbox(ctx context.Context, sandboxID string) (SandboxStatus, error)
 		return SandboxStatus{}, errNeedSandboxID
 	}
 
-	lockFile, err := rLockSandbox(sandboxID)
+	lockFile, err := rwLockSandbox(sandboxID)
 	if err != nil {
 		return SandboxStatus{}, err
 	}
+	defer unlockSandbox(lockFile)
 
 	s, err := fetchSandbox(ctx, sandboxID)
 	if err != nil {
@@ -349,16 +350,6 @@ func StatusSandbox(ctx context.Context, sandboxID string) (SandboxStatus, error)
 	}
 	defer s.releaseStatelessSandbox()
 
-	// We need to potentially wait for a separate container.stop() routine
-	// that needs to be terminated before we return from this function.
-	// Deferring the synchronization here is very important since we want
-	// to avoid a deadlock. Indeed, the goroutine started by statusContainer
-	// will need to lock an exclusive lock, meaning that all other locks have
-	// to be released to let this happen. This call ensures this will be the
-	// last operation executed by this function.
-	defer s.wg.Wait()
-	defer unlockSandbox(lockFile)
-
 	var contStatusList []ContainerStatus
 	for _, container := range s.containers {
 		contStatus, err := statusContainer(s, container.id)
@@ -548,10 +539,11 @@ func StatusContainer(ctx context.Context, sandboxID, containerID string) (Contai
 		return ContainerStatus{}, errNeedContainerID
 	}
 
-	lockFile, err := rLockSandbox(sandboxID)
+	lockFile, err := rwLockSandbox(sandboxID)
 	if err != nil {
 		return ContainerStatus{}, err
 	}
+	defer unlockSandbox(lockFile)
 
 	s, err := fetchSandbox(ctx, sandboxID)
 	if err != nil {
@@ -560,21 +552,21 @@ func StatusContainer(ctx context.Context, sandboxID, containerID string) (Contai
 	}
 	defer s.releaseStatelessSandbox()
 
-	// We need to potentially wait for a separate container.stop() routine
-	// that needs to be terminated before we return from this function.
-	// Deferring the synchronization here is very important since we want
-	// to avoid a deadlock. Indeed, the goroutine started by statusContainer
-	// will need to lock an exclusive lock, meaning that all other locks have
-	// to be released to let this happen. This call ensures this will be the
-	// last operation executed by this function.
-	defer s.wg.Wait()
-	defer unlockSandbox(lockFile)
-
 	return statusContainer(s, containerID)
 }
 
-// This function is going to spawn a goroutine and it needs to be waited for
-// by the caller.
+// This function might have to stop the container if it realizes the shim
+// process is not running anymore. This might be caused by two different
+// reasons, either the process inside the VM exited and the shim terminated
+// accordingly, or the shim has been killed directly and we need to make sure
+// that we properly stop the container process inside the VM.
+//
+// When a container needs to be stopped because of those reasons, we want this
+// to happen atomically from a sandbox perspective. That's why we cannot afford
+// to take a read or read/write lock based on the situation, as it would break
+// the initial locking from the caller. Instead, a read/write lock has to be
+// taken from the caller, even if we simply return the container status without
+// taking any action regarding the container.
 func statusContainer(sandbox *Sandbox, containerID string) (ContainerStatus, error) {
 	for _, container := range sandbox.containers {
 		if container.id == containerID {
@@ -592,19 +584,9 @@ func statusContainer(sandbox *Sandbox, containerID string) (ContainerStatus, err
 				}
 
 				if !running {
-					sandbox.wg.Add(1)
-					go func() {
-						defer sandbox.wg.Done()
-						lockFile, err := rwLockSandbox(sandbox.id)
-						if err != nil {
-							return
-						}
-						defer unlockSandbox(lockFile)
-
-						if err := container.stop(); err != nil {
-							return
-						}
-					}()
+					if err := container.stop(); err != nil {
+						return ContainerStatus{}, err
+					}
 				}
 			}