Skip to content

Commit

Permalink
salt: Skip "Pending" pods when draining a node
Browse files Browse the repository at this point in the history
In most cases, it does not make sense to try to evict a Pod which is
Pending as this one is not yet running and it also can cause trouble
during MetalK8s expansions as some Pod may be Pending on a node that is
not yet deployed (so the Pending pod cannot get evicted).

Let's skip "Pending" pods by default when we drain a node in
`deploy_node` orchestrate (used during expansions and upgrade,
downgrade)
  • Loading branch information
TeddyAndrieux committed Dec 15, 2021
1 parent 8a3e55d commit 60df8ec
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 1 deletion.
4 changes: 4 additions & 0 deletions CHANGELOG.md
@@ -1,5 +1,9 @@
# CHANGELOG
## Release 2.10.7 (in development)
## Enhancements

- Skip "Pending" pods when draining a node
(PR[#3641](https://github.com/scality/metalk8s/pull/3641))

## Release 2.10.6
## Enhancements
Expand Down
22 changes: 22 additions & 0 deletions salt/_modules/metalk8s_drain.py
Expand Up @@ -122,6 +122,7 @@ def _message_from_pods_dict(errors_dict):
# pod statuses
POD_STATUS_SUCCEEDED = "Succeeded"
POD_STATUS_FAILED = "Failed"
POD_STATUS_PENDING = "Pending"


class Drain(object):
Expand All @@ -136,6 +137,7 @@ class Drain(object):
WARNING_MSG = {
"daemonset": "Ignoring DaemonSet-managed pods",
"localStorage": "Deleting pods with local storage",
"pending": "Ignoring Pending pods",
"unmanaged": (
"Deleting pods not managed by ReplicationController, "
"ReplicaSet, Job, DaemonSet or StatefulSet"
Expand All @@ -157,6 +159,7 @@ def __init__(
force=False,
grace_period=1,
ignore_daemonset=False,
ignore_pending=False,
timeout=0,
delete_local_data=False,
best_effort=False,
Expand All @@ -166,6 +169,7 @@ def __init__(
self._force = force
self._grace_period = grace_period
self._ignore_daemonset = ignore_daemonset
self._ignore_pending = ignore_pending
# Default timeout is 3600 seconds
self._timeout = timeout or 3600
self._delete_local_data = delete_local_data
Expand All @@ -176,6 +180,7 @@ def __init__(
force = property(operator.attrgetter("_force"))
grace_period = property(operator.attrgetter("_grace_period"))
ignore_daemonset = property(operator.attrgetter("_ignore_daemonset"))
ignore_pending = property(operator.attrgetter("_ignore_pending"))
timeout = property(operator.attrgetter("_timeout"))
delete_local_data = property(operator.attrgetter("_delete_local_data"))

Expand Down Expand Up @@ -269,6 +274,19 @@ def get_pod_controller(self, pod):
)
return controller_ref

def status_filter(self, pod):
"""Compute eviction status for the pod according to status.
If the Pod is pending and `ignore_pending` option was set then
the pod is just skipped
Returns: (is_deletable: bool, warning: str)
"""
if pod["status"]["phase"] == POD_STATUS_PENDING and self.ignore_pending:
return False, self.WARNING_MSG["pending"]

return True, ""

def daemonset_filter(self, pod):
"""Compute eviction status for the pod according to DaemonSet kind.
Expand Down Expand Up @@ -326,6 +344,7 @@ def get_pods_for_eviction(self):
self.localstorage_filter,
self.unreplicated_filter,
self.daemonset_filter,
self.status_filter,
):
try:
filter_deletable, warning = pod_filter(pod)
Expand Down Expand Up @@ -550,6 +569,7 @@ def node_drain(
force=False,
grace_period=1,
ignore_daemonset=False,
ignore_pending=False,
timeout=0,
delete_local_data=False,
best_effort=False,
Expand All @@ -562,6 +582,7 @@ def node_drain(
- force : ignore unreplicated pods (i.e. StaticPod pods)
- grace_period : eviction grace period
- ignore_daemonset : ignore daemonsets in eviction process
- ignore_pending : ignore pending pods
- timeout : drain process timeout value
- delete_local_data : force deletion for pods with local storage
- best_effort : try to drain the node as much as possible but do not
Expand All @@ -579,6 +600,7 @@ def node_drain(
force=force,
grace_period=grace_period,
ignore_daemonset=ignore_daemonset,
ignore_pending=ignore_pending,
timeout=timeout,
delete_local_data=delete_local_data,
best_effort=best_effort,
Expand Down
1 change: 1 addition & 0 deletions salt/metalk8s/orchestrate/deploy_node.sls
Expand Up @@ -130,6 +130,7 @@ Drain the node:
metalk8s_drain.node_drained:
- name: {{ node_name }}
- ignore_daemonset: True
- ignore_pending: True
- delete_local_data: True
- force: True
{%- if pillar.orchestrate.get("drain_timeout") %}
Expand Down
39 changes: 38 additions & 1 deletion salt/tests/unit/modules/files/test_metalk8s_drain.yaml
Expand Up @@ -154,13 +154,14 @@ drain:
pods_to_evict:
- my-finished-pod

# All possible pods (ReplicaSet, DaemonSet, unmanaged, with local storage)
# All possible pods (ReplicaSet, DaemonSet, unmanaged, Pending, with local storage)
- node_name: my-node
dataset: full
# Add some args to evict the special pods
force: true
delete_local_data: true
ignore_daemonset: true
ignore_pending: true
# DaemonSet-managed and static pods are ignored
pods_to_evict:
- my-pod
Expand All @@ -183,6 +184,27 @@ drain:
Deleting pods not managed by ReplicationController, ReplicaSet,
Job, DaemonSet or StatefulSet: my-static-pod
# Pending pods (evicted by default)
# FIXME: all filters are executed, and we see some incorrect warnings
- &evict_filter_pending_pod
node_name: my-node
dataset: single-pending-pod
pods_to_evict:
- my-pending-pod
log_lines:
- level: WARNING
contains: >-
Deleting pods not managed by ReplicationController, ReplicaSet,
Job, DaemonSet or StatefulSet: my-pending-pod
# Pending pods (ignored, with a warning)
- <<: *evict_filter_pending_pod
ignore_pending: true
pods_to_evict: []
log_lines:
- level: WARNING
contains: "Ignoring Pending pods: my-pending-pod"

# DaemonSet-managed pods (ignored, with a warning)
- node_name: my-node
dataset: single-daemonset
Expand Down Expand Up @@ -442,6 +464,15 @@ datasets:
<<: *base_pod_status
phase: Succeeded

pending-pod: &pending_pod
<<: *base_pod
metadata:
<<: *base_pod_meta
name: my-pending-pod
status:
<<: *base_pod_status
phase: Pending

replicaset-pod: &replicaset_pod
<<: *base_pod
metadata: &replicaset_pod_meta
Expand Down Expand Up @@ -516,6 +547,11 @@ datasets:
pods:
- *static_pod

single-pending-pod:
<<: *empty_dataset
pods:
- *pending_pod

finished-pod: &finished_pod_dataset
<<: *empty_dataset
pods:
Expand Down Expand Up @@ -587,6 +623,7 @@ datasets:
- *base_pod
- *replicaset_pod
- *static_pod
- *pending_pod
- *local_storage_pod
- *daemonset_pod
replicasets:
Expand Down

0 comments on commit 60df8ec

Please sign in to comment.