forked from containerd/cri
-
Notifications
You must be signed in to change notification settings - Fork 0
/
container_stop.go
166 lines (146 loc) · 5.73 KB
/
container_stop.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"fmt"
"time"
"github.com/containerd/containerd/api/services/execution"
"github.com/docker/docker/pkg/signal"
"github.com/golang/glog"
"golang.org/x/net/context"
"golang.org/x/sys/unix"
"k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
"github.com/kubernetes-incubator/cri-containerd/pkg/metadata"
)
const (
// stopCheckPollInterval is the the interval to check whether a container
// is stopped successfully.
stopCheckPollInterval = 100 * time.Millisecond
// killContainerTimeout is the timeout that we wait for the container to
// be SIGKILLed.
killContainerTimeout = 2 * time.Minute
)
// StopContainer stops a running container with a grace period (i.e., timeout).
func (c *criContainerdService) StopContainer(ctx context.Context, r *runtime.StopContainerRequest) (retRes *runtime.StopContainerResponse, retErr error) {
glog.V(2).Infof("StopContainer for %q with timeout %d (s)", r.GetContainerId(), r.GetTimeout())
defer func() {
if retErr == nil {
glog.V(2).Infof("StopContainer %q returns successfully", r.GetContainerId())
}
}()
// Get container config from container store.
meta, err := c.containerStore.Get(r.GetContainerId())
if err != nil {
return nil, fmt.Errorf("an error occurred when try to find container %q: %v", r.GetContainerId(), err)
}
if err := c.stopContainer(ctx, meta, time.Duration(r.GetTimeout())*time.Second); err != nil {
return nil, err
}
return &runtime.StopContainerResponse{}, nil
}
// stopContainer stops a container based on the container metadata.
func (c *criContainerdService) stopContainer(ctx context.Context, meta *metadata.ContainerMetadata, timeout time.Duration) error {
id := meta.ID
// Return without error if container is not running. This makes sure that
// stop only takes real action after the container is started.
if meta.State() != runtime.ContainerState_CONTAINER_RUNNING {
glog.V(2).Infof("Container to stop %q is not running, current state %q",
id, criContainerStateToString(meta.State()))
return nil
}
if timeout > 0 {
stopSignal := unix.SIGTERM
imageMeta, err := c.imageMetadataStore.Get(meta.ImageRef)
if err != nil {
// NOTE(random-liu): It's possible that the container is stopped,
// deleted and image is garbage collected before this point. However,
// the chance is really slim, even it happens, it's still fine to return
// an error here.
return fmt.Errorf("failed to get image metadata %q: %v", meta.ImageRef, err)
}
if imageMeta.Config.StopSignal != "" {
stopSignal, err = signal.ParseSignal(imageMeta.Config.StopSignal)
if err != nil {
return fmt.Errorf("failed to parse stop signal %q: %v",
imageMeta.Config.StopSignal, err)
}
}
glog.V(2).Infof("Stop container %q with signal %v", id, stopSignal)
_, err = c.taskService.Kill(ctx, &execution.KillRequest{
ContainerID: id,
Signal: uint32(stopSignal),
PidOrAll: &execution.KillRequest_All{All: true},
})
if err != nil {
if !isContainerdGRPCNotFoundError(err) && !isRuncProcessAlreadyFinishedError(err) {
return fmt.Errorf("failed to stop container %q: %v", id, err)
}
// Move on to make sure container status is updated.
}
err = c.waitContainerStop(ctx, id, timeout)
if err == nil {
return nil
}
glog.Errorf("Stop container %q timed out: %v", id, err)
}
// Event handler will Delete the container from containerd after it handles the Exited event.
glog.V(2).Infof("Kill container %q", id)
_, err := c.taskService.Kill(ctx, &execution.KillRequest{
ContainerID: id,
Signal: uint32(unix.SIGKILL),
PidOrAll: &execution.KillRequest_All{All: true},
})
if err != nil {
if !isContainerdGRPCNotFoundError(err) && !isRuncProcessAlreadyFinishedError(err) {
return fmt.Errorf("failed to kill container %q: %v", id, err)
}
// Move on to make sure container status is updated.
}
// Wait for a fixed timeout until container stop is observed by event monitor.
if err := c.waitContainerStop(ctx, id, killContainerTimeout); err != nil {
return fmt.Errorf("an error occurs during waiting for container %q to stop: %v", id, err)
}
return nil
}
// waitContainerStop polls container state until timeout exceeds or container is stopped.
func (c *criContainerdService) waitContainerStop(ctx context.Context, id string, timeout time.Duration) error {
ticker := time.NewTicker(stopCheckPollInterval)
defer ticker.Stop()
timeoutTimer := time.NewTimer(timeout)
defer timeoutTimer.Stop()
for {
// Poll once before waiting for stopCheckPollInterval.
meta, err := c.containerStore.Get(id)
if err != nil {
if !metadata.IsNotExistError(err) {
return fmt.Errorf("failed to get container %q metadata: %v", id, err)
}
// Do not return error here because container was removed means
// it is already stopped.
glog.Warningf("Container %q was removed during stopping", id)
return nil
}
// TODO(random-liu): Use channel with event handler instead of polling.
if meta.State() == runtime.ContainerState_CONTAINER_EXITED {
return nil
}
select {
case <-ctx.Done():
return fmt.Errorf("wait container %q is cancelled", id)
case <-timeoutTimer.C:
return fmt.Errorf("wait container %q stop timeout", id)
case <-ticker.C:
continue
}
}
}