diff --git a/hack/local-up-cluster.sh b/hack/local-up-cluster.sh index e9cbe0f0a5b6..522eed63e7de 100755 --- a/hack/local-up-cluster.sh +++ b/hack/local-up-cluster.sh @@ -41,6 +41,11 @@ CGROUP_ROOT=${CGROUP_ROOT:-""} # name of the cgroup driver, i.e. cgroupfs or systemd CGROUP_DRIVER=${CGROUP_DRIVER:-""} +# enables testing eviction scenarios locally. +EVICTION_HARD=${EVICTION_HARD:-"memory.available<100Mi"} +EVICTION_SOFT=${EVICTION_SOFT:-""} +EVICTION_PRESSURE_TRANSITION_PERIOD=${EVICTION_PRESSURE_TRANSITION_PERIOD:-"1m"} + # We disable cluster DNS by default because this script uses docker0 (or whatever # container bridge docker is currently using) and we don't know the IP of the # DNS pod to pass in as --cluster-dns. To set this up by hand, set this flag @@ -545,6 +550,9 @@ function start_kubelet { --cgroup-driver=${CGROUP_DRIVER} \ --cgroup-root=${CGROUP_ROOT} \ --keep-terminated-pod-volumes=true \ + --eviction-hard=${EVICTION_HARD} \ + --eviction-soft=${EVICTION_SOFT} \ + --eviction-pressure-transition-period=${EVICTION_PRESSURE_TRANSITION_PERIOD} \ ${auth_args} \ ${dns_args} \ ${net_plugin_dir_args} \ diff --git a/pkg/kubelet/eviction/eviction_manager.go b/pkg/kubelet/eviction/eviction_manager.go index 04d6e9a75142..01f23d2e7f4f 100644 --- a/pkg/kubelet/eviction/eviction_manager.go +++ b/pkg/kubelet/eviction/eviction_manager.go @@ -186,6 +186,8 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act return } + glog.V(3).Infof("eviction manager: synchronize housekeeping") + // build the ranking functions (if not yet known) // TODO: have a function in cadvisor that lets us know if global housekeeping has completed if len(m.resourceToRankFunc) == 0 || len(m.resourceToNodeReclaimFuncs) == 0 { @@ -204,6 +206,7 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act glog.Errorf("eviction manager: unexpected err: %v", err) return } + debugLogObservations("observations", observations) // attempt to create a threshold notifier to improve eviction response time if m.config.KernelMemcgNotification && !m.notifiersInitialized { @@ -230,15 +233,18 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act // determine the set of thresholds met independent of grace period thresholds = thresholdsMet(thresholds, observations, false) + debugLogThresholdsWithObservation("thresholds - ignoring grace period", thresholds, observations) // determine the set of thresholds previously met that have not yet satisfied the associated min-reclaim if len(m.thresholdsMet) > 0 { thresholdsNotYetResolved := thresholdsMet(m.thresholdsMet, observations, true) thresholds = mergeThresholds(thresholds, thresholdsNotYetResolved) } + debugLogThresholdsWithObservation("thresholds - reclaim not satisfied", thresholds, observations) // determine the set of thresholds whose stats have been updated since the last sync thresholds = thresholdsUpdatedStats(thresholds, observations, m.lastObservations) + debugLogThresholdsWithObservation("thresholds - updated stats", thresholds, observations) // track when a threshold was first observed now := m.clock.Now() @@ -246,15 +252,22 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act // the set of node conditions that are triggered by currently observed thresholds nodeConditions := nodeConditions(thresholds) + if len(nodeConditions) > 0 { + glog.V(3).Infof("eviction manager: node conditions - observed: %v", nodeConditions) + } // track when a node condition was last observed nodeConditionsLastObservedAt := nodeConditionsLastObservedAt(nodeConditions, m.nodeConditionsLastObservedAt, now) // node conditions report true if it has been observed within the transition period window nodeConditions = nodeConditionsObservedSince(nodeConditionsLastObservedAt, m.config.PressureTransitionPeriod, now) + if len(nodeConditions) > 0 { + glog.V(3).Infof("eviction manager: node conditions - transition period not met: %v", nodeConditions) + } // determine the set of thresholds we need to drive eviction behavior (i.e. all grace periods are met) thresholds = thresholdsMetGracePeriod(thresholdsFirstObservedAt, now) + debugLogThresholdsWithObservation("thresholds - grace periods satisified", thresholds, observations) // update internal state m.Lock() diff --git a/pkg/kubelet/eviction/helpers.go b/pkg/kubelet/eviction/helpers.go index b52a19ff58de..7d66878b44e7 100644 --- a/pkg/kubelet/eviction/helpers.go +++ b/pkg/kubelet/eviction/helpers.go @@ -694,6 +694,29 @@ func thresholdsMet(thresholds []Threshold, observations signalObservations, enfo return results } +func debugLogObservations(logPrefix string, observations signalObservations) { + for k, v := range observations { + if !v.time.IsZero() { + glog.V(3).Infof("eviction manager: %v: signal=%v, available: %v, capacity: %v, time: %v", logPrefix, k, v.available, v.capacity, v.time) + } else { + glog.V(3).Infof("eviction manager: %v: signal=%v, available: %v, capacity: %v", logPrefix, k, v.available, v.capacity) + } + } +} + +func debugLogThresholdsWithObservation(logPrefix string, thresholds []Threshold, observations signalObservations) { + for i := range thresholds { + threshold := thresholds[i] + observed, found := observations[threshold.Signal] + if found { + quantity := getThresholdQuantity(threshold.Value, observed.capacity) + glog.V(3).Infof("eviction manager: %v: threshold [signal=%v, quantity=%v] observed %v", logPrefix, threshold.Signal, quantity, observed.available) + } else { + glog.V(3).Infof("eviction manager: %v: threshold [signal=%v] had no observation", logPrefix, threshold.Signal) + } + } +} + func thresholdsUpdatedStats(thresholds []Threshold, observations, lastObservations signalObservations) []Threshold { results := []Threshold{} for i := range thresholds {