Skip to content

Commit

Permalink
Merge pull request #41227 from cpuguy83/work_around_missing_shim_event
Browse files Browse the repository at this point in the history
Work around missing shim event
  • Loading branch information
thaJeztah committed Jul 30, 2020
2 parents 47b7c88 + c458bca commit 07746cc
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 66 deletions.
1 change: 1 addition & 0 deletions daemon/kill.go
Expand Up @@ -99,6 +99,7 @@ func (daemon *Daemon) killWithSignal(container *containerpkg.Container, sig int)
if errdefs.IsNotFound(err) {
unpause = false
logrus.WithError(err).WithField("container", container.ID).WithField("action", "kill").Debug("container kill failed because of 'container not found' or 'no such process'")
go daemon.handleContainerExit(container, nil)
} else {
return errors.Wrapf(err, "Cannot kill container %s", container.ID)
}
Expand Down
143 changes: 77 additions & 66 deletions daemon/monitor.go
Expand Up @@ -24,6 +24,82 @@ func (daemon *Daemon) setStateCounter(c *container.Container) {
}
}

func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontainerdtypes.EventInfo) error {
c.Lock()

ec, et, err := daemon.containerd.DeleteTask(context.Background(), c.ID)
if err != nil {
logrus.WithError(err).Warnf("failed to delete container %s from containerd", c.ID)
}

ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
c.StreamConfig.Wait(ctx)
cancel()
c.Reset(false)

exitStatus := container.ExitStatus{
ExitCode: int(ec),
ExitedAt: et,
}
if e != nil {
exitStatus.ExitCode = int(e.ExitCode)
exitStatus.ExitedAt = e.ExitedAt
exitStatus.OOMKilled = e.OOMKilled
if e.Error != nil {
c.SetError(e.Error)
}
}

restart, wait, err := c.RestartManager().ShouldRestart(ec, daemon.IsShuttingDown() || c.HasBeenManuallyStopped, time.Since(c.StartedAt))
if err == nil && restart {
c.RestartCount++
c.SetRestarting(&exitStatus)
} else {
c.SetStopped(&exitStatus)
defer daemon.autoRemove(c)
}
defer c.Unlock() // needs to be called before autoRemove

// cancel healthcheck here, they will be automatically
// restarted if/when the container is started again
daemon.stopHealthchecks(c)
attributes := map[string]string{
"exitCode": strconv.Itoa(int(ec)),
}
daemon.LogContainerEventWithAttributes(c, "die", attributes)
daemon.Cleanup(c)
daemon.setStateCounter(c)
cpErr := c.CheckpointTo(daemon.containersReplica)

if err == nil && restart {
go func() {
err := <-wait
if err == nil {
// daemon.netController is initialized when daemon is restoring containers.
// But containerStart will use daemon.netController segment.
// So to avoid panic at startup process, here must wait util daemon restore done.
daemon.waitForStartupDone()
if err = daemon.containerStart(c, "", "", false); err != nil {
logrus.Debugf("failed to restart container: %+v", err)
}
}
if err != nil {
c.Lock()
c.SetStopped(&exitStatus)
daemon.setStateCounter(c)
c.CheckpointTo(daemon.containersReplica)
c.Unlock()
defer daemon.autoRemove(c)
if err != restartmanager.ErrRestartCanceled {
logrus.Errorf("restartmanger wait error: %+v", err)
}
}
}()
}

return cpErr
}

// ProcessEvent is called by libcontainerd whenever an event occurs
func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei libcontainerdtypes.EventInfo) error {
c, err := daemon.GetContainer(id)
Expand All @@ -48,72 +124,7 @@ func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei
daemon.LogContainerEvent(c, "oom")
case libcontainerdtypes.EventExit:
if int(ei.Pid) == c.Pid {
c.Lock()
_, _, err := daemon.containerd.DeleteTask(context.Background(), c.ID)
if err != nil {
logrus.WithError(err).Warnf("failed to delete container %s from containerd", c.ID)
}
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
c.StreamConfig.Wait(ctx)
cancel()
c.Reset(false)

exitStatus := container.ExitStatus{
ExitCode: int(ei.ExitCode),
ExitedAt: ei.ExitedAt,
OOMKilled: ei.OOMKilled,
}
restart, wait, err := c.RestartManager().ShouldRestart(ei.ExitCode, daemon.IsShuttingDown() || c.HasBeenManuallyStopped, time.Since(c.StartedAt))
if err == nil && restart {
c.RestartCount++
c.SetRestarting(&exitStatus)
} else {
if ei.Error != nil {
c.SetError(ei.Error)
}
c.SetStopped(&exitStatus)
defer daemon.autoRemove(c)
}
defer c.Unlock() // needs to be called before autoRemove

// cancel healthcheck here, they will be automatically
// restarted if/when the container is started again
daemon.stopHealthchecks(c)
attributes := map[string]string{
"exitCode": strconv.Itoa(int(ei.ExitCode)),
}
daemon.LogContainerEventWithAttributes(c, "die", attributes)
daemon.Cleanup(c)
daemon.setStateCounter(c)
cpErr := c.CheckpointTo(daemon.containersReplica)

if err == nil && restart {
go func() {
err := <-wait
if err == nil {
// daemon.netController is initialized when daemon is restoring containers.
// But containerStart will use daemon.netController segment.
// So to avoid panic at startup process, here must wait util daemon restore done.
daemon.waitForStartupDone()
if err = daemon.containerStart(c, "", "", false); err != nil {
logrus.Debugf("failed to restart container: %+v", err)
}
}
if err != nil {
c.Lock()
c.SetStopped(&exitStatus)
daemon.setStateCounter(c)
c.CheckpointTo(daemon.containersReplica)
c.Unlock()
defer daemon.autoRemove(c)
if err != restartmanager.ErrRestartCanceled {
logrus.Errorf("restartmanger wait error: %+v", err)
}
}
}()
}

return cpErr
return daemon.handleContainerExit(c, &ei)
}

exitCode := 127
Expand Down

0 comments on commit 07746cc

Please sign in to comment.