Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Attempt at cordon but no drain #45

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
18 changes: 18 additions & 0 deletions cmd/draino/draino.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ func main() {
drainBuffer = app.Flag("drain-buffer", "Minimum time between starting each drain. Nodes are always cordoned immediately.").Default(kubernetes.DefaultDrainBuffer.String()).Duration()
nodeLabels = app.Flag("node-label", "Only nodes with this label will be eligible for cordoning and draining. May be specified multiple times.").PlaceHolder("KEY=VALUE").StringMap()

noDrain = app.Flag("no-drain", "Do not drain nodes, only cordon them").Bool()

evictDaemonSetPods = app.Flag("evict-daemonset-pods", "Evict pods that were created by an extant DaemonSet.").Bool()
evictLocalStoragePods = app.Flag("evict-emptydir-pods", "Evict pods with local storage, i.e. with emptyDir volumes.").Bool()
evictUnreplicatedPods = app.Flag("evict-unreplicated-pods", "Evict pods that were not created by a replication controller.").Bool()
Expand Down Expand Up @@ -137,6 +139,22 @@ func main() {
}
}

if *noDrain {
jacobstr marked this conversation as resolved.
Show resolved Hide resolved
h = cache.FilteringResourceEventHandler{
FilterFunc: kubernetes.NewNodeProcessed().Filter,
Handler: kubernetes.NewDrainingResourceEventHandler(
kubernetes.NewAPICordonDrainer(cs,
kubernetes.MaxGracePeriod(*maxGracePeriod),
kubernetes.EvictionHeadroom(*evictionHeadroom),
kubernetes.WithPodFilter(kubernetes.NewPodFilters(pf...)),
kubernetes.WithDrain(false),
),
kubernetes.NewEventRecorder(cs),
kubernetes.WithLogger(log),
kubernetes.WithDrainBuffer(*drainBuffer)),
}
}

sf := cache.FilteringResourceEventHandler{FilterFunc: kubernetes.NodeSchedulableFilter, Handler: h}
cf := cache.FilteringResourceEventHandler{FilterFunc: kubernetes.NewNodeConditionFilter(*conditions), Handler: sf}
lf := cache.FilteringResourceEventHandler{FilterFunc: kubernetes.NewNodeLabelFilter(*nodeLabels), Handler: cf}
Expand Down
59 changes: 37 additions & 22 deletions internal/kubernetes/drainer.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ const (
DefaultEvictionOverhead time.Duration = 30 * time.Second

kindDaemonSet = "DaemonSet"

DefaultWithDrain = true
)

type errTimeout struct{}
Expand Down Expand Up @@ -89,6 +91,7 @@ type APICordonDrainer struct {

maxGracePeriod time.Duration
evictionHeadroom time.Duration
withDrain bool
}

// SuppliedCondition defines the condition will be watched.
Expand Down Expand Up @@ -126,6 +129,13 @@ func WithPodFilter(f PodFilterFunc) APICordonDrainerOption {
}
}

// WithDrain determines if we're actually going to drain nodes
func WithDrain(b bool) APICordonDrainerOption {
return func(d *APICordonDrainer) {
d.withDrain = b
}
}

// NewAPICordonDrainer returns a CordonDrainer that cordons and drains nodes via
// the Kubernetes API.
func NewAPICordonDrainer(c kubernetes.Interface, ao ...APICordonDrainerOption) *APICordonDrainer {
Expand All @@ -134,6 +144,7 @@ func NewAPICordonDrainer(c kubernetes.Interface, ao ...APICordonDrainerOption) *
filter: NewPodFilters(),
maxGracePeriod: DefaultMaxGracePeriod,
evictionHeadroom: DefaultEvictionOverhead,
withDrain: DefaultWithDrain,
}
for _, o := range ao {
o(d)
Expand Down Expand Up @@ -163,32 +174,36 @@ func (d *APICordonDrainer) Cordon(n *core.Node) error {

// Drain the supplied node. Evicts the node of all but mirror and DaemonSet pods.
func (d *APICordonDrainer) Drain(n *core.Node) error {
pods, err := d.getPods(n.GetName())
if err != nil {
return errors.Wrapf(err, "cannot get pods for node %s", n.GetName())
}

abort := make(chan struct{})
errs := make(chan error, 1)
for _, pod := range pods {
go d.evict(pod, abort, errs)
}
// This will _eventually_ abort evictions. Evictions may spend up to
// d.deleteTimeout() in d.awaitDeletion(), or 5 seconds in backoff before
// noticing they've been aborted.
defer close(abort)
if d.withDrain {
jaxxstorm marked this conversation as resolved.
Show resolved Hide resolved
pods, err := d.getPods(n.GetName())
if err != nil {
return errors.Wrapf(err, "cannot get pods for node %s", n.GetName())
}

deadline := time.After(d.deleteTimeout())
for range pods {
select {
case err := <-errs:
if err != nil {
return errors.Wrap(err, "cannot evict all pods")
abort := make(chan struct{})
errs := make(chan error, 1)
for _, pod := range pods {
go d.evict(pod, abort, errs)
}
// This will _eventually_ abort evictions. Evictions may spend up to
// d.deleteTimeout() in d.awaitDeletion(), or 5 seconds in backoff before
// noticing they've been aborted.
defer close(abort)

deadline := time.After(d.deleteTimeout())
for range pods {
select {
case err := <-errs:
if err != nil {
return errors.Wrap(err, "cannot evict all pods")
}
case <-deadline:
return errors.Wrap(errTimeout{}, "timed out waiting for evictions to complete")
}
case <-deadline:
return errors.Wrap(errTimeout{}, "timed out waiting for evictions to complete")
}
return nil
}
// do nothing
return nil
}

Expand Down