Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix race conditions in libcontainerd process handling #35809

Merged
merged 2 commits into from Dec 16, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
14 changes: 5 additions & 9 deletions daemon/kill.go
Expand Up @@ -4,10 +4,10 @@ import (
"context"
"fmt"
"runtime"
"strings"
"syscall"
"time"

"github.com/docker/docker/api/errdefs"
containerpkg "github.com/docker/docker/container"
"github.com/docker/docker/libcontainerd"
"github.com/docker/docker/pkg/signal"
Expand Down Expand Up @@ -97,15 +97,11 @@ func (daemon *Daemon) killWithSignal(container *containerpkg.Container, sig int)
}

if err := daemon.kill(container, sig); err != nil {
err = errors.Wrapf(err, "Cannot kill container %s", container.ID)
// if container or process not exists, ignore the error
// TODO: we shouldn't have to parse error strings from containerd
if strings.Contains(err.Error(), "container not found") ||
strings.Contains(err.Error(), "no such process") {
logrus.Warnf("container kill failed because of 'container not found' or 'no such process': %s", err.Error())
if errdefs.IsNotFound(err) {
unpause = false
logrus.WithError(err).WithField("container", container.ID).WithField("action", "kill").Debug("container kill failed because of 'container not found' or 'no such process'")
} else {
return err
return errors.Wrapf(err, "Cannot kill container %s", container.ID)
}
}

Expand Down Expand Up @@ -171,7 +167,7 @@ func (daemon *Daemon) Kill(container *containerpkg.Container) error {
// killPossibleDeadProcess is a wrapper around killSig() suppressing "no such process" error.
func (daemon *Daemon) killPossiblyDeadProcess(container *containerpkg.Container, sig int) error {
err := daemon.killWithSignal(container, sig)
if err == syscall.ESRCH {
if errdefs.IsNotFound(err) {
e := errNoSuchProcess{container.GetPID(), sig}
logrus.Debug(e)
return e
Expand Down
162 changes: 101 additions & 61 deletions libcontainerd/client_daemon.go
Expand Up @@ -27,6 +27,7 @@ import (
"github.com/containerd/containerd/archive"
"github.com/containerd/containerd/cio"
"github.com/containerd/containerd/content"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/images"
"github.com/containerd/containerd/linux/runctypes"
"github.com/containerd/typeurl"
Expand All @@ -42,7 +43,7 @@ import (
const InitProcessName = "init"

type container struct {
sync.Mutex
mu sync.Mutex

bundleDir string
ctr containerd.Container
Expand All @@ -51,6 +52,54 @@ type container struct {
oomKilled bool
}

func (c *container) setTask(t containerd.Task) {
c.mu.Lock()
c.task = t
c.mu.Unlock()
}

func (c *container) getTask() containerd.Task {
c.mu.Lock()
t := c.task
c.mu.Unlock()
return t
}

func (c *container) addProcess(id string, p containerd.Process) {
c.mu.Lock()
if c.execs == nil {
c.execs = make(map[string]containerd.Process)
}
c.execs[id] = p
c.mu.Unlock()
}

func (c *container) deleteProcess(id string) {
c.mu.Lock()
delete(c.execs, id)
c.mu.Unlock()
}

func (c *container) getProcess(id string) containerd.Process {
c.mu.Lock()
p := c.execs[id]
c.mu.Unlock()
return p
}

func (c *container) setOOMKilled(killed bool) {
c.mu.Lock()
c.oomKilled = killed
c.mu.Unlock()
}

func (c *container) getOOMKilled() bool {
c.mu.Lock()
killed := c.oomKilled
c.mu.Unlock()
return killed
}

type client struct {
sync.RWMutex // protects containers map

Expand Down Expand Up @@ -160,10 +209,10 @@ func (c *client) Create(ctx context.Context, id string, ociSpec *specs.Spec, run
// Start create and start a task for the specified containerd id
func (c *client) Start(ctx context.Context, id, checkpointDir string, withStdin bool, attachStdio StdioCallback) (int, error) {
ctr := c.getContainer(id)
switch {
case ctr == nil:
if ctr == nil {
return -1, errors.WithStack(newNotFoundError("no such container"))
case ctr.task != nil:
}
if t := ctr.getTask(); t != nil {
return -1, errors.WithStack(newConflictError("container already started"))
}

Expand Down Expand Up @@ -227,9 +276,7 @@ func (c *client) Start(ctx context.Context, id, checkpointDir string, withStdin
return -1, err
}

c.Lock()
c.containers[id].task = t
c.Unlock()
ctr.setTask(t)

// Signal c.createIO that it can call CloseIO
close(stdinCloseSync)
Expand All @@ -239,9 +286,7 @@ func (c *client) Start(ctx context.Context, id, checkpointDir string, withStdin
c.logger.WithError(err).WithField("container", id).
Error("failed to delete task after fail start")
}
c.Lock()
c.containers[id].task = nil
c.Unlock()
ctr.setTask(nil)
return -1, err
}

Expand All @@ -250,12 +295,15 @@ func (c *client) Start(ctx context.Context, id, checkpointDir string, withStdin

func (c *client) Exec(ctx context.Context, containerID, processID string, spec *specs.Process, withStdin bool, attachStdio StdioCallback) (int, error) {
ctr := c.getContainer(containerID)
switch {
case ctr == nil:
if ctr == nil {
return -1, errors.WithStack(newNotFoundError("no such container"))
case ctr.task == nil:
}
t := ctr.getTask()
if t == nil {
return -1, errors.WithStack(newInvalidParameterError("container is not running"))
case ctr.execs != nil && ctr.execs[processID] != nil:
}

if p := ctr.getProcess(processID); p != nil {
return -1, errors.WithStack(newConflictError("id already in use"))
}

Expand All @@ -278,7 +326,7 @@ func (c *client) Exec(ctx context.Context, containerID, processID string, spec *
}
}()

p, err = ctr.task.Exec(ctx, processID, spec, func(id string) (cio.IO, error) {
p, err = t.Exec(ctx, processID, spec, func(id string) (cio.IO, error) {
rio, err = c.createIO(fifos, containerID, processID, stdinCloseSync, attachStdio)
return rio, err
})
Expand All @@ -291,21 +339,14 @@ func (c *client) Exec(ctx context.Context, containerID, processID string, spec *
return -1, err
}

ctr.Lock()
if ctr.execs == nil {
ctr.execs = make(map[string]containerd.Process)
}
ctr.execs[processID] = p
ctr.Unlock()
ctr.addProcess(processID, p)

// Signal c.createIO that it can call CloseIO
close(stdinCloseSync)

if err = p.Start(ctx); err != nil {
p.Delete(context.Background())
ctr.Lock()
delete(ctr.execs, processID)
ctr.Unlock()
ctr.deleteProcess(processID)
return -1, err
}

Expand All @@ -317,7 +358,7 @@ func (c *client) SignalProcess(ctx context.Context, containerID, processID strin
if err != nil {
return err
}
return p.Kill(ctx, syscall.Signal(signal))
return wrapError(p.Kill(ctx, syscall.Signal(signal)))
}

func (c *client) ResizeTerminal(ctx context.Context, containerID, processID string, width, height int) error {
Expand Down Expand Up @@ -431,12 +472,9 @@ func (c *client) DeleteTask(ctx context.Context, containerID string) (uint32, ti
return 255, time.Now(), nil
}

c.Lock()
if ctr, ok := c.containers[containerID]; ok {
ctr.task = nil
if ctr := c.getContainer(containerID); ctr != nil {
ctr.setTask(nil)
}
c.Unlock()

return status.ExitCode(), status.ExitTime(), nil
}

Expand Down Expand Up @@ -470,7 +508,12 @@ func (c *client) Status(ctx context.Context, containerID string) (Status, error)
return StatusUnknown, errors.WithStack(newNotFoundError("no such container"))
}

s, err := ctr.task.Status(ctx)
t := ctr.getTask()
if t == nil {
return StatusUnknown, errors.WithStack(newNotFoundError("no such task"))
}

s, err := t.Status(ctx)
if err != nil {
return StatusUnknown, err
}
Expand Down Expand Up @@ -546,26 +589,22 @@ func (c *client) removeContainer(id string) {

func (c *client) getProcess(containerID, processID string) (containerd.Process, error) {
ctr := c.getContainer(containerID)
switch {
case ctr == nil:
if ctr == nil {
return nil, errors.WithStack(newNotFoundError("no such container"))
case ctr.task == nil:
}

t := ctr.getTask()
if t == nil {
return nil, errors.WithStack(newNotFoundError("container is not running"))
case processID == InitProcessName:
return ctr.task, nil
default:
ctr.Lock()
defer ctr.Unlock()
if ctr.execs == nil {
return nil, errors.WithStack(newNotFoundError("no execs"))
}
}
if processID == InitProcessName {
return t, nil
}

p := ctr.execs[processID]
p := ctr.getProcess(processID)
if p == nil {
return nil, errors.WithStack(newNotFoundError("no such exec"))
}

return p, nil
}

Expand Down Expand Up @@ -623,12 +662,7 @@ func (c *client) processEvent(ctr *container, et EventType, ei EventInfo) {
}

if et == EventExit && ei.ProcessID != ei.ContainerID {
var p containerd.Process
ctr.Lock()
if ctr.execs != nil {
p = ctr.execs[ei.ProcessID]
}
ctr.Unlock()
p := ctr.getProcess(ei.ProcessID)
if p == nil {
c.logger.WithError(errors.New("no such process")).
WithFields(logrus.Fields{
Expand All @@ -644,9 +678,8 @@ func (c *client) processEvent(ctr *container, et EventType, ei EventInfo) {
"process": ei.ProcessID,
}).Warn("failed to delete process")
}
c.Lock()
delete(ctr.execs, ei.ProcessID)
c.Unlock()
ctr.deleteProcess(ei.ProcessID)

ctr := c.getContainer(ei.ContainerID)
if ctr == nil {
c.logger.WithFields(logrus.Fields{
Expand Down Expand Up @@ -783,10 +816,10 @@ func (c *client) processEventStream(ctx context.Context) {
}

if oomKilled {
ctr.oomKilled = true
ctr.setOOMKilled(true)
oomKilled = false
}
ei.OOMKilled = ctr.oomKilled
ei.OOMKilled = ctr.getOOMKilled()

c.processEvent(ctr, et, ei)
}
Expand Down Expand Up @@ -816,12 +849,19 @@ func (c *client) writeContent(ctx context.Context, mediaType, ref string, r io.R
}

func wrapError(err error) error {
if err != nil {
msg := err.Error()
for _, s := range []string{"container does not exist", "not found", "no such container"} {
if strings.Contains(msg, s) {
return wrapNotFoundError(err)
}
if err == nil {
return nil
}

switch {
case errdefs.IsNotFound(err):
return wrapNotFoundError(err)
}

msg := err.Error()
for _, s := range []string{"container does not exist", "not found", "no such container"} {
if strings.Contains(msg, s) {
return wrapNotFoundError(err)
}
}
return err
Expand Down