Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rename node status to node health in NodeLifecycleController #69305

Merged
merged 1 commit into from
Oct 2, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
131 changes: 67 additions & 64 deletions pkg/controller/nodelifecycle/node_lifecycle_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,14 +128,14 @@ const (
)

const (
// The amount of time the nodecontroller should sleep between retrying NodeStatus updates
// The amount of time the nodecontroller should sleep between retrying node health updates
retrySleepTime = 20 * time.Millisecond
)

type nodeStatusData struct {
type nodeHealthData struct {
probeTimestamp metav1.Time
readyTransitionTimestamp metav1.Time
status v1.NodeStatus
status *v1.NodeStatus
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The commit message is slightly misleading since you also changed this to a pointer.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated the commit message to include the pointer change now. :)

}

// Controller is the controller that manages node's life cycle.
Expand All @@ -155,8 +155,8 @@ type Controller struct {
computeZoneStateFunc func(nodeConditions []*v1.NodeCondition) (int, ZoneState)

knownNodeSet map[string]*v1.Node
// per Node map storing last observed Status together with a local time when it was observed.
nodeStatusMap map[string]nodeStatusData
// per Node map storing last observed health together with a local time when it was observed.
nodeHealthMap map[string]*nodeHealthData

// Lock to access evictor workers
evictorLock sync.Mutex
Expand All @@ -180,28 +180,31 @@ type Controller struct {
recorder record.EventRecorder

// Value controlling Controller monitoring period, i.e. how often does Controller
// check node status posted from kubelet. This value should be lower than nodeMonitorGracePeriod.
// TODO: Change node status monitor to watch based.
// check node health signal posted from kubelet. This value should be lower than
// nodeMonitorGracePeriod.
// TODO: Change node health monitor to watch based.
nodeMonitorPeriod time.Duration

// Value used if sync_nodes_status=False, only for node startup. When node
// is just created, e.g. cluster bootstrap or node creation, we give a longer grace period.
// When node is just created, e.g. cluster bootstrap or node creation, we give
// a longer grace period.
nodeStartupGracePeriod time.Duration

// Value used if sync_nodes_status=False. Controller will not proactively
// sync node status in this case, but will monitor node status updated from kubelet. If
// it doesn't receive update for this amount of time, it will start posting "NodeReady==
// ConditionUnknown". The amount of time before which Controller start evicting pods
// is controlled via flag 'pod-eviction-timeout'.
// Note: be cautious when changing the constant, it must work with nodeStatusUpdateFrequency
// in kubelet. There are several constraints:
// 1. nodeMonitorGracePeriod must be N times more than nodeStatusUpdateFrequency, where
// N means number of retries allowed for kubelet to post node status. It is pointless
// to make nodeMonitorGracePeriod be less than nodeStatusUpdateFrequency, since there
// will only be fresh values from Kubelet at an interval of nodeStatusUpdateFrequency.
// The constant must be less than podEvictionTimeout.
// 2. nodeMonitorGracePeriod can't be too large for user experience - larger value takes
// longer for user to see up-to-date node status.
// Controller will not proactively sync node health, but will monitor node
// health signal updated from kubelet. If it doesn't receive update for this
// amount of time, it will start posting "NodeReady==ConditionUnknown". The
// amount of time before which Controller start evicting pods is controlled
// via flag 'pod-eviction-timeout'.
// Note: be cautious when changing the constant, it must work with
// nodeStatusUpdateFrequency in kubelet. There are several constraints:
// 1. nodeMonitorGracePeriod must be N times more than
// nodeStatusUpdateFrequency, where N means number of retries allowed for
// kubelet to post node health signal. It is pointless to make
// nodeMonitorGracePeriod be less than nodeStatusUpdateFrequency, since
// there will only be fresh values from Kubelet at an interval of
// nodeStatusUpdateFrequency. The constant must be less than
// podEvictionTimeout.
// 2. nodeMonitorGracePeriod can't be too large for user experience - larger
// value takes longer for user to see up-to-date node health.
nodeMonitorGracePeriod time.Duration

podEvictionTimeout time.Duration
Expand Down Expand Up @@ -259,7 +262,7 @@ func NewNodeLifecycleController(podInformer coreinformers.PodInformer,
kubeClient: kubeClient,
now: metav1.Now,
knownNodeSet: make(map[string]*v1.Node),
nodeStatusMap: make(map[string]nodeStatusData),
nodeHealthMap: make(map[string]*nodeHealthData),
nodeExistsInCloudProvider: func(nodeName types.NodeName) (bool, error) {
return nodeutil.ExistsInCloudProvider(cloud, nodeName)
},
Expand Down Expand Up @@ -419,10 +422,10 @@ func (nc *Controller) Run(stopCh <-chan struct{}) {
go wait.Until(nc.doEvictionPass, scheduler.NodeEvictionPeriod, stopCh)
}

// Incorporate the results of node status pushed from kubelet to master.
// Incorporate the results of node health signal pushed from kubelet to master.
go wait.Until(func() {
if err := nc.monitorNodeStatus(); err != nil {
glog.Errorf("Error monitoring node status: %v", err)
if err := nc.monitorNodeHealth(); err != nil {
glog.Errorf("Error monitoring node health: %v", err)
}
}, nc.nodeMonitorPeriod, stopCh)

Expand Down Expand Up @@ -608,10 +611,10 @@ func (nc *Controller) doEvictionPass() {
}
}

// monitorNodeStatus verifies node status are constantly updated by kubelet, and if not,
// post "NodeReady==ConditionUnknown". It also evicts all pods if node is not ready or
// not reachable for a long period of time.
func (nc *Controller) monitorNodeStatus() error {
// monitorNodeHealth verifies node health are constantly updated by kubelet, and
// if not, post "NodeReady==ConditionUnknown". It also evicts all pods if node
// is not ready or not reachable for a long period of time.
func (nc *Controller) monitorNodeHealth() error {
// We are listing nodes from local cache as we can tolerate some small delays
// comparing to state from etcd and there is eventual consistency anyway.
nodes, err := nc.nodeLister.List(labels.Everything())
Expand Down Expand Up @@ -648,20 +651,20 @@ func (nc *Controller) monitorNodeStatus() error {
var observedReadyCondition v1.NodeCondition
var currentReadyCondition *v1.NodeCondition
node := nodes[i].DeepCopy()
if err := wait.PollImmediate(retrySleepTime, retrySleepTime*scheduler.NodeStatusUpdateRetry, func() (bool, error) {
gracePeriod, observedReadyCondition, currentReadyCondition, err = nc.tryUpdateNodeStatus(node)
if err := wait.PollImmediate(retrySleepTime, retrySleepTime*scheduler.NodeHealthUpdateRetry, func() (bool, error) {
gracePeriod, observedReadyCondition, currentReadyCondition, err = nc.tryUpdateNodeHealth(node)
if err == nil {
return true, nil
}
name := node.Name
node, err = nc.kubeClient.CoreV1().Nodes().Get(name, metav1.GetOptions{})
if err != nil {
glog.Errorf("Failed while getting a Node to retry updating NodeStatus. Probably Node %s was deleted.", name)
glog.Errorf("Failed while getting a Node to retry updating node health. Probably Node %s was deleted.", name)
return false, err
}
return false, nil
}); err != nil {
glog.Errorf("Update status of Node '%v' from Controller error: %v. "+
glog.Errorf("Update health of Node '%v' from Controller error: %v. "+
"Skipping - no pods will be evicted.", node.Name, err)
continue
}
Expand Down Expand Up @@ -689,12 +692,12 @@ func (nc *Controller) monitorNodeStatus() error {
)
}
} else {
if decisionTimestamp.After(nc.nodeStatusMap[node.Name].readyTransitionTimestamp.Add(nc.podEvictionTimeout)) {
if decisionTimestamp.After(nc.nodeHealthMap[node.Name].readyTransitionTimestamp.Add(nc.podEvictionTimeout)) {
if nc.evictPods(node) {
glog.V(2).Infof("Node is NotReady. Adding Pods on Node %s to eviction queue: %v is later than %v + %v",
node.Name,
decisionTimestamp,
nc.nodeStatusMap[node.Name].readyTransitionTimestamp,
nc.nodeHealthMap[node.Name].readyTransitionTimestamp,
nc.podEvictionTimeout,
)
}
Expand All @@ -716,12 +719,12 @@ func (nc *Controller) monitorNodeStatus() error {
)
}
} else {
if decisionTimestamp.After(nc.nodeStatusMap[node.Name].probeTimestamp.Add(nc.podEvictionTimeout)) {
if decisionTimestamp.After(nc.nodeHealthMap[node.Name].probeTimestamp.Add(nc.podEvictionTimeout)) {
if nc.evictPods(node) {
glog.V(2).Infof("Node is unresponsive. Adding Pods on Node %s to eviction queues: %v is later than %v + %v",
node.Name,
decisionTimestamp,
nc.nodeStatusMap[node.Name].readyTransitionTimestamp,
nc.nodeHealthMap[node.Name].readyTransitionTimestamp,
nc.podEvictionTimeout-gracePeriod,
)
}
Expand Down Expand Up @@ -799,9 +802,9 @@ func (nc *Controller) monitorNodeStatus() error {
return nil
}

// tryUpdateNodeStatus checks a given node's conditions and tries to update it. Returns grace period to
// tryUpdateNodeHealth checks a given node's conditions and tries to update it. Returns grace period to
// which given node is entitled, state of current and last observed Ready Condition, and an error if it occurred.
func (nc *Controller) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1.NodeCondition, *v1.NodeCondition, error) {
func (nc *Controller) tryUpdateNodeHealth(node *v1.Node) (time.Duration, v1.NodeCondition, *v1.NodeCondition, error) {
var err error
var gracePeriod time.Duration
var observedReadyCondition v1.NodeCondition
Expand All @@ -817,8 +820,8 @@ func (nc *Controller) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1.Node
LastTransitionTime: node.CreationTimestamp,
}
gracePeriod = nc.nodeStartupGracePeriod
nc.nodeStatusMap[node.Name] = nodeStatusData{
status: node.Status,
nc.nodeHealthMap[node.Name] = &nodeHealthData{
status: &node.Status,
probeTimestamp: node.CreationTimestamp,
readyTransitionTimestamp: node.CreationTimestamp,
}
Expand All @@ -828,7 +831,7 @@ func (nc *Controller) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1.Node
gracePeriod = nc.nodeMonitorGracePeriod
}

savedNodeStatus, found := nc.nodeStatusMap[node.Name]
savedNodeHealth, found := nc.nodeHealthMap[node.Name]
// There are following cases to check:
// - both saved and new status have no Ready Condition set - we leave everything as it is,
// - saved status have no Ready Condition, but current one does - Controller was restarted with Node data already present in etcd,
Expand All @@ -845,28 +848,28 @@ func (nc *Controller) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1.Node
// if that's the case, but it does not seem necessary.
var savedCondition *v1.NodeCondition
if found {
_, savedCondition = v1node.GetNodeCondition(&savedNodeStatus.status, v1.NodeReady)
_, savedCondition = v1node.GetNodeCondition(savedNodeHealth.status, v1.NodeReady)
}
_, observedCondition := v1node.GetNodeCondition(&node.Status, v1.NodeReady)
if !found {
glog.Warningf("Missing timestamp for Node %s. Assuming now as a timestamp.", node.Name)
savedNodeStatus = nodeStatusData{
status: node.Status,
savedNodeHealth = &nodeHealthData{
status: &node.Status,
probeTimestamp: nc.now(),
readyTransitionTimestamp: nc.now(),
}
} else if savedCondition == nil && observedCondition != nil {
glog.V(1).Infof("Creating timestamp entry for newly observed Node %s", node.Name)
savedNodeStatus = nodeStatusData{
status: node.Status,
savedNodeHealth = &nodeHealthData{
status: &node.Status,
probeTimestamp: nc.now(),
readyTransitionTimestamp: nc.now(),
}
} else if savedCondition != nil && observedCondition == nil {
glog.Errorf("ReadyCondition was removed from Status of Node %s", node.Name)
// TODO: figure out what to do in this case. For now we do the same thing as above.
savedNodeStatus = nodeStatusData{
status: node.Status,
savedNodeHealth = &nodeHealthData{
status: &node.Status,
probeTimestamp: nc.now(),
readyTransitionTimestamp: nc.now(),
}
Expand All @@ -878,22 +881,22 @@ func (nc *Controller) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1.Node
glog.V(3).Infof("ReadyCondition for Node %s transitioned from %v to %v", node.Name, savedCondition, observedCondition)
transitionTime = nc.now()
} else {
transitionTime = savedNodeStatus.readyTransitionTimestamp
transitionTime = savedNodeHealth.readyTransitionTimestamp
}
if glog.V(5) {
glog.V(5).Infof("Node %s ReadyCondition updated. Updating timestamp: %+v vs %+v.", node.Name, savedNodeStatus.status, node.Status)
glog.V(5).Infof("Node %s ReadyCondition updated. Updating timestamp: %+v vs %+v.", node.Name, savedNodeHealth.status, node.Status)
} else {
glog.V(3).Infof("Node %s ReadyCondition updated. Updating timestamp.", node.Name)
}
savedNodeStatus = nodeStatusData{
status: node.Status,
savedNodeHealth = &nodeHealthData{
status: &node.Status,
probeTimestamp: nc.now(),
readyTransitionTimestamp: transitionTime,
}
}
nc.nodeStatusMap[node.Name] = savedNodeStatus
nc.nodeHealthMap[node.Name] = savedNodeHealth

if nc.now().After(savedNodeStatus.probeTimestamp.Add(gracePeriod)) {
if nc.now().After(savedNodeHealth.probeTimestamp.Add(gracePeriod)) {
// NodeReady condition was last set longer ago than gracePeriod, so update it to Unknown
// (regardless of its current value) in the master.
if currentReadyCondition == nil {
Expand All @@ -908,7 +911,7 @@ func (nc *Controller) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1.Node
})
} else {
glog.V(4).Infof("node %v hasn't been updated for %+v. Last ready condition is: %+v",
node.Name, nc.now().Time.Sub(savedNodeStatus.probeTimestamp.Time), observedReadyCondition)
node.Name, nc.now().Time.Sub(savedNodeHealth.probeTimestamp.Time), observedReadyCondition)
if observedReadyCondition.Status != v1.ConditionUnknown {
currentReadyCondition.Status = v1.ConditionUnknown
currentReadyCondition.Reason = "NodeStatusUnknown"
Expand Down Expand Up @@ -944,7 +947,7 @@ func (nc *Controller) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1.Node
})
} else {
glog.V(4).Infof("node %v hasn't been updated for %+v. Last %v is: %+v",
node.Name, nc.now().Time.Sub(savedNodeStatus.probeTimestamp.Time), nodeConditionType, currentCondition)
node.Name, nc.now().Time.Sub(savedNodeHealth.probeTimestamp.Time), nodeConditionType, currentCondition)
if currentCondition.Status != v1.ConditionUnknown {
currentCondition.Status = v1.ConditionUnknown
currentCondition.Reason = "NodeStatusUnknown"
Expand All @@ -960,9 +963,9 @@ func (nc *Controller) tryUpdateNodeStatus(node *v1.Node) (time.Duration, v1.Node
glog.Errorf("Error updating node %s: %v", node.Name, err)
return gracePeriod, observedReadyCondition, currentReadyCondition, err
}
nc.nodeStatusMap[node.Name] = nodeStatusData{
status: node.Status,
probeTimestamp: nc.nodeStatusMap[node.Name].probeTimestamp,
nc.nodeHealthMap[node.Name] = &nodeHealthData{
status: &node.Status,
probeTimestamp: nc.nodeHealthMap[node.Name].probeTimestamp,
readyTransitionTimestamp: nc.now(),
}
return gracePeriod, observedReadyCondition, currentReadyCondition, nil
Expand Down Expand Up @@ -1044,10 +1047,10 @@ func (nc *Controller) handleDisruption(zoneToNodeConditions map[string][]*v1.Nod
// When exiting disruption mode update probe timestamps on all Nodes.
now := nc.now()
for i := range nodes {
v := nc.nodeStatusMap[nodes[i].Name]
v := nc.nodeHealthMap[nodes[i].Name]
v.probeTimestamp = now
v.readyTransitionTimestamp = now
nc.nodeStatusMap[nodes[i].Name] = v
nc.nodeHealthMap[nodes[i].Name] = v
}
// We reset all rate limiters to settings appropriate for the given state.
for k := range nc.zoneStates {
Expand Down