Skip to content

Commit

Permalink
[WIP] Mark operator as degraded if there are any pods in CrashLoopBac…
Browse files Browse the repository at this point in the history
…kOff state

This patch checks for the statuses of pods of those deployments and daemonsets
which are in a "hung" state. If any of the pods are in CrashLoopBackOff state
the operator will be marked as degraded.

Signed-off-by: Surya Seetharaman <suryaseetharaman.9@gmail.com>
  • Loading branch information
tssurya committed Jun 15, 2020
1 parent bc4f0bd commit a10490e
Showing 1 changed file with 31 additions and 0 deletions.
31 changes: 31 additions & 0 deletions pkg/controller/statusmanager/pod_status.go
Expand Up @@ -14,6 +14,7 @@ import (
"github.com/openshift/cluster-network-operator/pkg/names"

appsv1 "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
Expand Down Expand Up @@ -91,6 +92,21 @@ func (status *StatusManager) SetFromPods() {
} else if ds.Status.NumberUnavailable > 0 {
progressing = append(progressing, fmt.Sprintf("DaemonSet %q is not available (awaiting %d nodes)", dsName.String(), ds.Status.NumberUnavailable))
dsProgressing = true
// Check for any pods in CrashLoopBackOff state and mark the operator as degrade if so.
pods := &v1.PodList{}
err := status.client.List(context.TODO(), pods, client.InNamespace(dsName.Namespace), client.MatchingLabels(ds.Spec.Selector.MatchLabels))
if err != nil {
log.Printf("Error getting pods from DaemonSet %q: %v", dsName.String(), err)
}
for _, pod := range pods.Items {
for _, container := range pod.Status.ContainerStatuses {
if container.State.Waiting != nil {
if container.State.Waiting.Reason == "CrashLoopBackOff" {
hung = append(hung, fmt.Sprintf("DaemonSet %q rollout is not making progress - pod %s is in CrashLoopBackOff State", dsName.String(), pod.Name))
}
}
}
}
} else if ds.Status.NumberAvailable == 0 { // NOTE: update this if we ever expect empty (unscheduled) daemonsets ~cdc
progressing = append(progressing, fmt.Sprintf("DaemonSet %q is not yet scheduled on any nodes", dsName.String()))
dsProgressing = true
Expand Down Expand Up @@ -138,6 +154,21 @@ func (status *StatusManager) SetFromPods() {
if dep.Status.UnavailableReplicas > 0 {
progressing = append(progressing, fmt.Sprintf("Deployment %q is not available (awaiting %d nodes)", depName.String(), dep.Status.UnavailableReplicas))
depProgressing = true
// Check for any pods in CrashLoopBackOff state and mark the operator as degrade if so.
pods := &v1.PodList{}
err := status.client.List(context.TODO(), pods, client.InNamespace(depName.Namespace), client.MatchingLabels(dep.Spec.Selector.MatchLabels))
if err != nil {
log.Printf("Error getting pods from deployment %q: %v", depName.String(), err)
}
for _, pod := range pods.Items {
for _, container := range pod.Status.ContainerStatuses {
if container.State.Waiting != nil {
if container.State.Waiting.Reason == "CrashLoopBackOff"{
hung = append(hung, fmt.Sprintf("Deployment %q rollout is not making progress - pod %s is in CrashLoopBackOff State", depName.String(), pod.Name))
}
}
}
}
} else if dep.Status.AvailableReplicas == 0 {
progressing = append(progressing, fmt.Sprintf("Deployment %q is not yet scheduled on any nodes", depName.String()))
depProgressing = true
Expand Down

0 comments on commit a10490e

Please sign in to comment.