Skip to content

Commit

Permalink
add not-ready reap logic and unit test (keikoproj#58)
Browse files Browse the repository at this point in the history
* add not-ready reap logic and unit test

Signed-off-by: sbadla1 <sahil_badla@intuit.com>
Signed-off-by: shaoxt <shaoxt@gmail.com>
  • Loading branch information
sbadla1 authored and shaoxt committed Dec 21, 2021
1 parent 889536f commit d410263
Show file tree
Hide file tree
Showing 4 changed files with 240 additions and 42 deletions.
3 changes: 3 additions & 0 deletions cmd/governor/app/pdbreaper.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,7 @@ func init() {
pdbReaperCmd.Flags().BoolVar(&pdbReaperArgs.AllCrashLoop, "all-crashloop", true, "Only deletes PDBs for crashlooping pods when all pods are in crashloop")
pdbReaperCmd.Flags().IntVar(&pdbReaperArgs.CrashLoopRestartCount, "crashloop-restart-count", 5, "Minimum restart count to when considering pods in crashloop")
pdbReaperCmd.Flags().StringSliceVar(&pdbReaperArgs.ExcludedNamespaces, "excluded-namespaces", []string{}, "Namespaces excluded from scanning")
pdbReaperCmd.Flags().BoolVar(&pdbReaperArgs.ReapNotReady, "reap-not-ready", true, "Deletes PDBs which have pods in not-ready state")
pdbReaperCmd.Flags().IntVar(&pdbReaperArgs.ReapNotReadyThreshold, "not-ready-threshold-seconds", 1800, "Minimum seconds to wait when considering pods in not-ready state")
pdbReaperCmd.Flags().BoolVar(&pdbReaperArgs.AllNotReady, "all-not-ready", false, "Only deletes PDBs for not-ready pods when all pods are in not-ready state")
}
53 changes: 49 additions & 4 deletions pkg/reaper/pdbreaper/pdbreaper.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,17 @@ var log = logrus.New()
const (
ReasonCrashLoopBackOff = "CrashLoopBackOff"

EventReasonPodDisruptionBudgetDeleted = "PodDisruptionBudgetDeleted"
EventReasonBlockingDetected = "BlockingPodDisruptionBudget"
EventReasonMultipleDetected = "MultiplePodDisruptionBudgets"
EventReasonBlockingCrashLoopDetected = "BlockingPodDisruptionBudgetWithCrashLoop"
EventReasonPodDisruptionBudgetDeleted = "PodDisruptionBudgetDeleted"
EventReasonBlockingDetected = "BlockingPodDisruptionBudget"
EventReasonMultipleDetected = "MultiplePodDisruptionBudgets"
EventReasonBlockingCrashLoopDetected = "BlockingPodDisruptionBudgetWithCrashLoop"
EventReasonBlockingNotReadyStateDetected = "BlockingPodDisruptionBudgetWithNotReadyState"

EventMessageDeletedFmt = "The PodDisruptionBudget %v has been deleted by pdb-reaper due to violation"
EventMessageBlockingFmt = "The PodDisruptionBudget %v has been marked for deletion due to misconfiguration/not allowing disruptions"
EventMessageMultipleFmt = "The PodDisruptionBudget %v has been marked for deletion due to multiple budgets targeting same pods"
EventMessageCrashLoopFmt = "The PodDisruptionBudget %v has been marked for deletion due to pods in CrashLoopBackOff blocking disruptions"
EventMessageNotReadyFmt = "The PodDisruptionBudget %v has been marked for deletion due to pods in not-ready blocking disruptions"
)

// Run is the main runner function for pdb-reaper, and will initialize and start the pdb-reaper
Expand Down Expand Up @@ -227,6 +229,17 @@ func (ctx *ReaperContext) handleBlockingDisruptionBudgets() error {
}
}
}

if ctx.ReapNotReady {
if notReady := isPodsInNotReadyState(pods, ctx.ReapNotReadyThreshold, ctx.AllNotReady); notReady {
log.Infof("PDB %v is marked reapable due to targeted pods in not-ready state: %+v", pdbNamespacedName(pdb), podSliceNamespacedNames(pods))
ctx.addReapablePodDisruptionBudget(pdb)
err = ctx.publishEvent(pdb, EventReasonBlockingNotReadyStateDetected, EventMessageNotReadyFmt)
if err != nil {
log.Warnf(err.Error())
}
}
}
}
}
return nil
Expand Down Expand Up @@ -409,3 +422,35 @@ func isPodsInCrashloop(pods []corev1.Pod, threshold int, allPods bool) bool {
}
return false
}

func isPodsInNotReadyState(pods []corev1.Pod, thresholdSeconds int, allPods bool) bool {
podCount := len(pods)
var notReadyCount int

for _, pod := range pods {

for _, condition := range pod.Status.Conditions {
if condition.Type == "ContainersReady" && condition.Status == "False" {
if isPodReadinessThresholdPast(condition.LastTransitionTime, thresholdSeconds) {
notReadyCount++
break
}
}
}
}
if !allPods {
if notReadyCount > 0 {
return true
}
} else {
if notReadyCount == podCount {
return true
}
}
return false
}

func isPodReadinessThresholdPast(startTime metav1.Time, thresholdSeconds int) bool {
currentTimestamp := metav1.Time{Time: time.Now()}
return currentTimestamp.Time.Sub(startTime.Time) >= time.Duration(thresholdSeconds)*time.Second
}
Loading

0 comments on commit d410263

Please sign in to comment.