-
Notifications
You must be signed in to change notification settings - Fork 4
/
contour.go
97 lines (81 loc) · 2.83 KB
/
contour.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
package cluster
import (
"context"
"time"
multierror "github.com/hashicorp/go-multierror"
"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
)
const (
DefaultContourNamespace = "projectcontour"
DefaultEnvoyPodsNotReadyDuration = 5 * time.Minute
)
// RestartFailedEnvoyPods will forcefully delete envoy pods that have fallen into an unrecoverable
// state for at least EnvoyPodsNotReadyDuration.
func (c *Controller) RestartFailedEnvoyPods(ctx context.Context) error {
logger := c.Log.With("phase", "RestartFailedEnvoyPods")
if !c.Config.RestartFailedEnvoyPods {
logger.Debugf("disabled, skipping")
return nil
}
if c.Config.ContourNamespace == "" {
logger.Debug("contour namespace not set, skipping")
return nil
}
logger.Debug("reconciling failed envoy pods")
selector := labels.SelectorFromSet(map[string]string{"app": "envoy"})
pods, err := c.Config.Client.CoreV1().Pods(c.Config.ContourNamespace).List(ctx, metav1.ListOptions{
LabelSelector: selector.String(),
})
if err != nil {
return errors.Wrap(err, "list envoy pods")
}
var multiErr error
for _, pod := range pods.Items {
logger = logger.With("pod", pod.Name)
if shouldRestartEnvoyPod(pod.Status, c.Config.EnvoyPodsNotReadyDuration) {
logger.Debug("forcefully deleting failed envoy pod")
err := c.Config.Client.CoreV1().Pods(c.Config.ContourNamespace).Delete(ctx, pod.Name, *metav1.NewDeleteOptions(0))
if err != nil {
multiErr = multierror.Append(multiErr, errors.Wrapf(err, "forcefully deleting pod %s", pod.Name))
} else {
logger.Info("forcefully deleted failed envoy pod")
}
}
}
return multiErr
}
// shouldRestartEnvoyPod will return true should the following conditions be satisfied:
// - the pod is running
// - shutdown manager is ready and envoy is not ready (due to failing readiness checks)
// - envoy has been notready for at least envoy_pods_not_ready_duration (default 5 minutes)
func shouldRestartEnvoyPod(podStatus corev1.PodStatus, notReadyDuration time.Duration) bool {
if podStatus.Phase != corev1.PodRunning {
return false
}
before := time.Now().Add(-notReadyDuration)
shutdownManagerReady, envoyReady := false, false
for _, status := range podStatus.ContainerStatuses {
if status.State.Running == nil || status.State.Running.StartedAt.Time.After(before) {
continue
}
switch status.Name {
case "shutdown-manager":
shutdownManagerReady = status.Ready
case "envoy":
envoyReady = status.Ready
}
}
if shutdownManagerReady && !envoyReady {
for _, condition := range podStatus.Conditions {
if condition.Type == corev1.ContainersReady &&
condition.Reason == "ContainersNotReady" &&
condition.Status == corev1.ConditionFalse {
return condition.LastTransitionTime.Time.Before(before)
}
}
}
return false
}