forked from openshift/origin
-
Notifications
You must be signed in to change notification settings - Fork 0
/
lifecycle.go
338 lines (302 loc) · 12.1 KB
/
lifecycle.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
package support
import (
"fmt"
"time"
"github.com/golang/glog"
kapi "github.com/GoogleCloudPlatform/kubernetes/pkg/api"
kerrors "github.com/GoogleCloudPlatform/kubernetes/pkg/api/errors"
kclient "github.com/GoogleCloudPlatform/kubernetes/pkg/client"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/cache"
"github.com/GoogleCloudPlatform/kubernetes/pkg/fields"
"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
"github.com/GoogleCloudPlatform/kubernetes/pkg/runtime"
"github.com/GoogleCloudPlatform/kubernetes/pkg/util/wait"
"github.com/GoogleCloudPlatform/kubernetes/pkg/watch"
deployapi "github.com/openshift/origin/pkg/deploy/api"
deployutil "github.com/openshift/origin/pkg/deploy/util"
namer "github.com/openshift/origin/pkg/util/namer"
)
// HookExecutor executes a deployment lifecycle hook.
type HookExecutor struct {
// PodClient provides access to pods.
PodClient HookExecutorPodClient
}
// Execute executes hook in the context of deployment. The label is used to
// distinguish the kind of hook (e.g. pre, post).
func (e *HookExecutor) Execute(hook *deployapi.LifecycleHook, deployment *kapi.ReplicationController, label string) error {
var err error
switch {
case hook.ExecNewPod != nil:
err = e.executeExecNewPod(hook, deployment, label)
}
if err == nil {
return nil
}
// Retry failures are treated the same as Abort.
switch hook.FailurePolicy {
case deployapi.LifecycleHookFailurePolicyAbort, deployapi.LifecycleHookFailurePolicyRetry:
return fmt.Errorf("Hook failed, aborting: %s", err)
case deployapi.LifecycleHookFailurePolicyIgnore:
glog.Infof("Hook failed, ignoring: %s", err)
return nil
default:
return err
}
}
// executeExecNewPod executes a ExecNewPod hook by creating a new pod based on
// the hook parameters and deployment. The pod is then synchronously watched
// until the pod completes, and if the pod failed, an error is returned.
//
// The hook pod inherits the following from the container the hook refers to:
//
// * Environment (hook keys take precedence)
// * Working directory
// * Resources
func (e *HookExecutor) executeExecNewPod(hook *deployapi.LifecycleHook, deployment *kapi.ReplicationController, label string) error {
// Build a pod spec from the hook config and deployment
podSpec, err := makeHookPod(hook, deployment, label)
if err != nil {
return err
}
// Try to create the pod.
pod, err := e.PodClient.CreatePod(deployment.Namespace, podSpec)
if err != nil {
if !kerrors.IsAlreadyExists(err) {
return fmt.Errorf("couldn't create lifecycle pod for %s: %v", deployutil.LabelForDeployment(deployment), err)
}
} else {
glog.V(0).Infof("Created lifecycle pod %s for deployment %s", pod.Name, deployutil.LabelForDeployment(deployment))
}
stopChannel := make(chan struct{})
defer close(stopChannel)
nextPod := e.PodClient.PodWatch(pod.Namespace, pod.Name, pod.ResourceVersion, stopChannel)
glog.V(0).Infof("Waiting for hook pod %s/%s to complete", pod.Namespace, pod.Name)
for {
pod := nextPod()
switch pod.Status.Phase {
case kapi.PodSucceeded:
return nil
case kapi.PodFailed:
return fmt.Errorf(pod.Status.Message)
}
}
}
// makeHookPod makes a pod spec from a hook and deployment.
func makeHookPod(hook *deployapi.LifecycleHook, deployment *kapi.ReplicationController, label string) (*kapi.Pod, error) {
exec := hook.ExecNewPod
var baseContainer *kapi.Container
for _, container := range deployment.Spec.Template.Spec.Containers {
if container.Name == exec.ContainerName {
baseContainer = &container
break
}
}
if baseContainer == nil {
return nil, fmt.Errorf("no container named '%s' found in deployment template", exec.ContainerName)
}
// Build a merged environment; hook environment takes precedence over base
// container environment
envMap := map[string]string{}
mergedEnv := []kapi.EnvVar{}
for _, env := range baseContainer.Env {
envMap[env.Name] = env.Value
}
for _, env := range exec.Env {
envMap[env.Name] = env.Value
}
for k, v := range envMap {
mergedEnv = append(mergedEnv, kapi.EnvVar{Name: k, Value: v})
}
// Inherit resources from the base container
resources := kapi.ResourceRequirements{}
if err := kapi.Scheme.Convert(&baseContainer.Resources, &resources); err != nil {
return nil, fmt.Errorf("couldn't clone ResourceRequirements: %v", err)
}
// Assigning to a variable since its address is required
maxDeploymentDurationSeconds := deployapi.MaxDeploymentDurationSeconds
// Let the kubelet manage retries if requested
restartPolicy := kapi.RestartPolicyNever
if hook.FailurePolicy == deployapi.LifecycleHookFailurePolicyRetry {
restartPolicy = kapi.RestartPolicyOnFailure
}
pod := &kapi.Pod{
ObjectMeta: kapi.ObjectMeta{
Name: namer.GetPodName(deployment.Name, label),
Annotations: map[string]string{
deployapi.DeploymentAnnotation: deployment.Name,
},
Labels: map[string]string{
deployapi.DeployerPodForDeploymentLabel: deployment.Name,
},
},
Spec: kapi.PodSpec{
Containers: []kapi.Container{
{
Name: "lifecycle",
Image: baseContainer.Image,
Command: exec.Command,
WorkingDir: baseContainer.WorkingDir,
Env: mergedEnv,
Resources: resources,
},
},
ActiveDeadlineSeconds: &maxDeploymentDurationSeconds,
// Setting the node selector on the hook pod so that it is created
// on the same set of nodes as the deployment pods.
NodeSelector: deployment.Spec.Template.Spec.NodeSelector,
RestartPolicy: restartPolicy,
},
}
return pod, nil
}
// HookExecutorPodClient abstracts access to pods.
type HookExecutorPodClient interface {
CreatePod(namespace string, pod *kapi.Pod) (*kapi.Pod, error)
PodWatch(namespace, name, resourceVersion string, stopChannel chan struct{}) func() *kapi.Pod
}
// HookExecutorPodClientImpl is a pluggable HookExecutorPodClient.
type HookExecutorPodClientImpl struct {
CreatePodFunc func(namespace string, pod *kapi.Pod) (*kapi.Pod, error)
PodWatchFunc func(namespace, name, resourceVersion string, stopChannel chan struct{}) func() *kapi.Pod
}
func (i *HookExecutorPodClientImpl) CreatePod(namespace string, pod *kapi.Pod) (*kapi.Pod, error) {
return i.CreatePodFunc(namespace, pod)
}
func (i *HookExecutorPodClientImpl) PodWatch(namespace, name, resourceVersion string, stopChannel chan struct{}) func() *kapi.Pod {
return i.PodWatchFunc(namespace, name, resourceVersion, stopChannel)
}
// NewPodWatch creates a pod watching function which is backed by a
// FIFO/reflector pair. This avoids managing watches directly.
// A stop channel to close the watch's reflector is also returned.
// It is the caller's responsibility to defer closing the stop channel to prevent leaking resources.
func NewPodWatch(client kclient.Interface, namespace, name, resourceVersion string, stopChannel chan struct{}) func() *kapi.Pod {
fieldSelector, _ := fields.ParseSelector("metadata.name=" + name)
podLW := &deployutil.ListWatcherImpl{
ListFunc: func() (runtime.Object, error) {
return client.Pods(namespace).List(labels.Everything(), fieldSelector)
},
WatchFunc: func(resourceVersion string) (watch.Interface, error) {
return client.Pods(namespace).Watch(labels.Everything(), fieldSelector, resourceVersion)
},
}
queue := cache.NewFIFO(cache.MetaNamespaceKeyFunc)
cache.NewReflector(podLW, &kapi.Pod{}, queue, 1*time.Minute).RunUntil(stopChannel)
return func() *kapi.Pod {
obj := queue.Pop()
return obj.(*kapi.Pod)
}
}
func NewFirstContainerReady(kclient kclient.Interface, timeout time.Duration, interval time.Duration) *FirstContainerReady {
return &FirstContainerReady{
timeout: timeout,
interval: interval,
podsForDeployment: func(deployment *kapi.ReplicationController) (*kapi.PodList, error) {
selector := labels.Set(deployment.Spec.Selector).AsSelector()
return kclient.Pods(deployment.Namespace).List(selector, fields.Everything())
},
getPodStore: func(namespace, name string) (cache.Store, chan struct{}) {
sel, _ := fields.ParseSelector("metadata.name=" + name)
store := cache.NewStore(cache.MetaNamespaceKeyFunc)
lw := &deployutil.ListWatcherImpl{
ListFunc: func() (runtime.Object, error) {
return kclient.Pods(namespace).List(labels.Everything(), sel)
},
WatchFunc: func(resourceVersion string) (watch.Interface, error) {
return kclient.Pods(namespace).Watch(labels.Everything(), sel, resourceVersion)
},
}
stop := make(chan struct{})
cache.NewReflector(lw, &kapi.Pod{}, store, 10*time.Second).RunUntil(stop)
return store, stop
},
}
}
type FirstContainerReady struct {
podsForDeployment func(*kapi.ReplicationController) (*kapi.PodList, error)
getPodStore func(namespace, name string) (cache.Store, chan struct{})
timeout time.Duration
interval time.Duration
}
func (c *FirstContainerReady) Accept(deployment *kapi.ReplicationController) error {
// For now, only validate the first replica.
if deployment.Spec.Replicas != 1 {
glog.Infof("automatically accepting deployment %s with %d replicas", deployutil.LabelForDeployment(deployment), deployment.Spec.Replicas)
return nil
}
// Try and find the pod for the deployment.
pods, err := c.podsForDeployment(deployment)
if err != nil {
return fmt.Errorf("couldn't get pods for deployment %s: %v", deployutil.LabelForDeployment(deployment), err)
}
if len(pods.Items) == 0 {
return fmt.Errorf("no pods found for deployment %s", deployutil.LabelForDeployment(deployment))
}
// If we found multiple, use the first one and log a warning.
// TODO: should finding multiple be an error?
pod := &pods.Items[0]
if len(pods.Items) > 1 {
glog.Infof("Warning: more than one pod for deployment %s; basing canary check on the first pod '%s'", deployutil.LabelForDeployment(deployment), pod.Name)
}
// Make a pod store to poll and ensure it gets cleaned up.
podStore, stopStore := c.getPodStore(pod.Namespace, pod.Name)
defer close(stopStore)
// Track container readiness based on those defined in the spec.
observedContainers := map[string]bool{}
for _, container := range pod.Spec.Containers {
observedContainers[container.Name] = false
}
// Start checking for pod updates.
glog.V(0).Infof("Waiting for pod %s/%s container readiness", pod.Namespace, pod.Name)
err = wait.Poll(c.interval, c.timeout, func() (done bool, err error) {
// Get the latest state of the pod.
obj, exists, err := podStore.Get(pod)
// Try again later on error or if the pod isn't available yet.
if err != nil {
glog.V(0).Infof("Error getting pod %s/%s to inspect container readiness: %v", pod.Namespace, pod.Name, err)
return false, nil
}
if !exists {
glog.V(0).Infof("Couldn't find pod %s/%s to inspect container readiness", pod.Namespace, pod.Name)
return false, nil
}
// New pod state is available; update the observed ready status of any
// containers.
updatedPod := obj.(*kapi.Pod)
for _, status := range updatedPod.Status.ContainerStatuses {
// Ignore any containers which aren't defined in the deployment spec.
if _, known := observedContainers[status.Name]; !known {
glog.V(0).Infof("Ignoring readiness of container %s in pod %s/%s because it's not present in the pod spec", status.Name, pod.Namespace, pod.Name)
continue
}
// The status of the container could be transient; we only care if it
// was ever ready. If it was ready and then became not ready, we
// consider it ready.
if status.Ready {
observedContainers[status.Name] = true
}
}
// Check whether all containers have been observed as ready.
allReady := true
for _, ready := range observedContainers {
if !ready {
allReady = false
break
}
}
// If all containers have been ready once, return success.
if allReady {
glog.V(0).Infof("All containers ready for %s/%s", pod.Namespace, pod.Name)
return true, nil
}
// Otherwise, try again later.
glog.V(4).Infof("Still waiting for pod %s/%s container readiness; observed statuses: #%v", pod.Namespace, pod.Name, observedContainers)
return false, nil
})
if err != nil {
if err == wait.ErrWaitTimeout {
return fmt.Errorf("timed out waiting for pod %s/%s containers to become ready", pod.Namespace, pod.Name)
}
return fmt.Errorf("pod %s/%s failed readiness check: %v", pod.Namespace, pod.Name, err)
}
return nil
}