Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Removing unnecessary code from node lifecycle controller #81416

Merged
merged 1 commit into from
Aug 22, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
67 changes: 22 additions & 45 deletions pkg/controller/nodelifecycle/node_lifecycle_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -595,13 +595,14 @@ func (nc *Controller) doNoExecuteTaintingPass() {
// Because we want to mimic NodeStatus.Condition["Ready"] we make "unreachable" and "not ready" taints mutually exclusive.
taintToAdd := v1.Taint{}
oppositeTaint := v1.Taint{}
if condition.Status == v1.ConditionFalse {
switch condition.Status {
case v1.ConditionFalse:
taintToAdd = *NotReadyTaintTemplate
oppositeTaint = *UnreachableTaintTemplate
} else if condition.Status == v1.ConditionUnknown {
case v1.ConditionUnknown:
taintToAdd = *UnreachableTaintTemplate
oppositeTaint = *NotReadyTaintTemplate
} else {
default:
// It seems that the Node is ready again, so there's no need to taint it.
klog.V(4).Infof("Node %v was in a taint queue, but it's ready now. Ignoring taint request.", value.Value)
return true, 0
Expand Down Expand Up @@ -720,7 +721,8 @@ func (nc *Controller) monitorNodeHealth() error {
decisionTimestamp := nc.now()
if currentReadyCondition != nil {
// Check eviction timeout against decisionTimestamp
if observedReadyCondition.Status == v1.ConditionFalse {
switch observedReadyCondition.Status {
case v1.ConditionFalse:
if nc.useTaintBasedEvictions {
// We want to update the taint straight away if Node is already tainted with the UnreachableTaint
if taintutils.TaintExists(node.Spec.Taints, UnreachableTaintTemplate) {
Expand All @@ -746,8 +748,7 @@ func (nc *Controller) monitorNodeHealth() error {
}
}
}
}
if observedReadyCondition.Status == v1.ConditionUnknown {
case v1.ConditionUnknown:
if nc.useTaintBasedEvictions {
// We want to update the taint straight away if Node is already tainted with the UnreachableTaint
if taintutils.TaintExists(node.Spec.Taints, NotReadyTaintTemplate) {
Expand All @@ -773,8 +774,7 @@ func (nc *Controller) monitorNodeHealth() error {
}
}
}
}
if observedReadyCondition.Status == v1.ConditionTrue {
case v1.ConditionTrue:
if nc.useTaintBasedEvictions {
removed, err := nc.markNodeAsReachable(node)
if err != nil {
Expand Down Expand Up @@ -807,7 +807,6 @@ func (nc *Controller) monitorNodeHealth() error {
// tryUpdateNodeHealth checks a given node's conditions and tries to update it. Returns grace period to
// which given node is entitled, state of current and last observed Ready Condition, and an error if it occurred.
func (nc *Controller) tryUpdateNodeHealth(node *v1.Node) (time.Duration, v1.NodeCondition, *v1.NodeCondition, error) {
var err error
var gracePeriod time.Duration
var observedReadyCondition v1.NodeCondition
_, currentReadyCondition := nodeutil.GetNodeCondition(&node.Status, v1.NodeReady)
Expand Down Expand Up @@ -836,7 +835,6 @@ func (nc *Controller) tryUpdateNodeHealth(node *v1.Node) (time.Duration, v1.Node
observedReadyCondition = *currentReadyCondition
gracePeriod = nc.nodeMonitorGracePeriod
}

savedNodeHealth, found := nc.nodeHealthMap[node.Name]
// There are following cases to check:
// - both saved and new status have no Ready Condition set - we leave everything as it is,
Expand All @@ -858,35 +856,35 @@ func (nc *Controller) tryUpdateNodeHealth(node *v1.Node) (time.Duration, v1.Node
_, savedCondition = nodeutil.GetNodeCondition(savedNodeHealth.status, v1.NodeReady)
savedLease = savedNodeHealth.lease
}
_, observedCondition := nodeutil.GetNodeCondition(&node.Status, v1.NodeReady)

if !found {
klog.Warningf("Missing timestamp for Node %s. Assuming now as a timestamp.", node.Name)
savedNodeHealth = &nodeHealthData{
status: &node.Status,
probeTimestamp: nc.now(),
readyTransitionTimestamp: nc.now(),
}
} else if savedCondition == nil && observedCondition != nil {
} else if savedCondition == nil && currentReadyCondition != nil {
klog.V(1).Infof("Creating timestamp entry for newly observed Node %s", node.Name)
savedNodeHealth = &nodeHealthData{
status: &node.Status,
probeTimestamp: nc.now(),
readyTransitionTimestamp: nc.now(),
}
} else if savedCondition != nil && observedCondition == nil {
} else if savedCondition != nil && currentReadyCondition == nil {
klog.Errorf("ReadyCondition was removed from Status of Node %s", node.Name)
// TODO: figure out what to do in this case. For now we do the same thing as above.
savedNodeHealth = &nodeHealthData{
status: &node.Status,
probeTimestamp: nc.now(),
readyTransitionTimestamp: nc.now(),
}
} else if savedCondition != nil && observedCondition != nil && savedCondition.LastHeartbeatTime != observedCondition.LastHeartbeatTime {
} else if savedCondition != nil && currentReadyCondition != nil && savedCondition.LastHeartbeatTime != currentReadyCondition.LastHeartbeatTime {
var transitionTime metav1.Time
// If ReadyCondition changed since the last time we checked, we update the transition timestamp to "now",
// otherwise we leave it as it is.
if savedCondition.LastTransitionTime != observedCondition.LastTransitionTime {
klog.V(3).Infof("ReadyCondition for Node %s transitioned from %v to %v", node.Name, savedCondition, observedCondition)
if savedCondition.LastTransitionTime != currentReadyCondition.LastTransitionTime {
klog.V(3).Infof("ReadyCondition for Node %s transitioned from %v to %v", node.Name, savedCondition, currentReadyCondition)
transitionTime = nc.now()
} else {
transitionTime = savedNodeHealth.readyTransitionTimestamp
Expand Down Expand Up @@ -919,31 +917,9 @@ func (nc *Controller) tryUpdateNodeHealth(node *v1.Node) (time.Duration, v1.Node
if nc.now().After(savedNodeHealth.probeTimestamp.Add(gracePeriod)) {
// NodeReady condition or lease was last set longer ago than gracePeriod, so
// update it to Unknown (regardless of its current value) in the master.
if currentReadyCondition == nil {
klog.V(2).Infof("node %v is never updated by kubelet", node.Name)
node.Status.Conditions = append(node.Status.Conditions, v1.NodeCondition{
Type: v1.NodeReady,
Status: v1.ConditionUnknown,
Reason: "NodeStatusNeverUpdated",
Message: fmt.Sprintf("Kubelet never posted node status."),
LastHeartbeatTime: node.CreationTimestamp,
LastTransitionTime: nc.now(),
})
} else {
klog.V(4).Infof("node %v hasn't been updated for %+v. Last ready condition is: %+v",
node.Name, nc.now().Time.Sub(savedNodeHealth.probeTimestamp.Time), observedReadyCondition)
if observedReadyCondition.Status != v1.ConditionUnknown {
currentReadyCondition.Status = v1.ConditionUnknown
currentReadyCondition.Reason = "NodeStatusUnknown"
currentReadyCondition.Message = "Kubelet stopped posting node status."
// LastProbeTime is the last time we heard from kubelet.
currentReadyCondition.LastHeartbeatTime = observedReadyCondition.LastHeartbeatTime
currentReadyCondition.LastTransitionTime = nc.now()
}
}

// remaining node conditions should also be set to Unknown
remainingNodeConditionTypes := []v1.NodeConditionType{
nodeConditionTypes := []v1.NodeConditionType{
v1.NodeReady,
v1.NodeMemoryPressure,
v1.NodeDiskPressure,
v1.NodePIDPressure,
Expand All @@ -952,7 +928,7 @@ func (nc *Controller) tryUpdateNodeHealth(node *v1.Node) (time.Duration, v1.Node
}

nowTimestamp := nc.now()
for _, nodeConditionType := range remainingNodeConditionTypes {
for _, nodeConditionType := range nodeConditionTypes {
_, currentCondition := nodeutil.GetNodeCondition(&node.Status, nodeConditionType)
if currentCondition == nil {
klog.V(2).Infof("Condition %v of node %v was never updated by kubelet", nodeConditionType, node.Name)
Expand All @@ -975,10 +951,11 @@ func (nc *Controller) tryUpdateNodeHealth(node *v1.Node) (time.Duration, v1.Node
}
}
}
// We need to update currentReadyCondition due to its value potentially changed.
_, currentReadyCondition = nodeutil.GetNodeCondition(&node.Status, v1.NodeReady)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If removing if didn't cause any tests to fail, it seems quite clear that some unit test is missing - please add test case to excercise this case.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added the tests that checks if returned currentReadyCondition is the same as one calculated based on the stored node status. IMHO it should cover all of the cases: all statuses, nil status and status changed to unknown.


_, currentCondition := nodeutil.GetNodeCondition(&node.Status, v1.NodeReady)
if !apiequality.Semantic.DeepEqual(currentCondition, &observedReadyCondition) {
if _, err = nc.kubeClient.CoreV1().Nodes().UpdateStatus(node); err != nil {
if !apiequality.Semantic.DeepEqual(currentReadyCondition, &observedReadyCondition) {
if _, err := nc.kubeClient.CoreV1().Nodes().UpdateStatus(node); err != nil {
klog.Errorf("Error updating node %s: %v", node.Name, err)
return gracePeriod, observedReadyCondition, currentReadyCondition, err
}
Expand All @@ -992,7 +969,7 @@ func (nc *Controller) tryUpdateNodeHealth(node *v1.Node) (time.Duration, v1.Node
}
}

return gracePeriod, observedReadyCondition, currentReadyCondition, err
return gracePeriod, observedReadyCondition, currentReadyCondition, nil
}

func (nc *Controller) handleDisruption(zoneToNodeConditions map[string][]*v1.NodeCondition, nodes []*v1.Node) {
Expand Down
213 changes: 213 additions & 0 deletions pkg/controller/nodelifecycle/node_lifecycle_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3057,3 +3057,216 @@ func TestReconcileNodeLabels(t *testing.T) {
}
}
}

func TestTryUpdateNodeHealth(t *testing.T) {
fakeNow := metav1.Date(2017, 1, 1, 12, 0, 0, 0, time.UTC)
fakeOld := metav1.Date(2016, 1, 1, 12, 0, 0, 0, time.UTC)
evictionTimeout := 10 * time.Minute

fakeNodeHandler := &testutil.FakeNodeHandler{
Existing: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: fakeNow,
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
LastHeartbeatTime: fakeNow,
LastTransitionTime: fakeNow,
},
},
},
},
},
Clientset: fake.NewSimpleClientset(&v1.PodList{Items: []v1.Pod{*testutil.NewPod("pod0", "node0")}}),
}

nodeController, _ := newNodeLifecycleControllerFromClient(
fakeNodeHandler,
evictionTimeout,
testRateLimiterQPS,
testRateLimiterQPS,
testLargeClusterThreshold,
testUnhealthyThreshold,
testNodeMonitorGracePeriod,
testNodeStartupGracePeriod,
testNodeMonitorPeriod,
true)
nodeController.now = func() metav1.Time { return fakeNow }
nodeController.recorder = testutil.NewFakeRecorder()

getStatus := func(cond *v1.NodeCondition) *v1.ConditionStatus {
if cond == nil {
return nil
}
return &cond.Status
}

tests := []struct {
name string
node *v1.Node
}{
{
name: "Status true",
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: fakeNow,
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
LastHeartbeatTime: fakeNow,
LastTransitionTime: fakeNow,
},
},
},
},
},
{
name: "Status false",
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: fakeNow,
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionFalse,
LastHeartbeatTime: fakeNow,
LastTransitionTime: fakeNow,
},
},
},
},
},
{
name: "Status unknown",
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: fakeNow,
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionUnknown,
LastHeartbeatTime: fakeNow,
LastTransitionTime: fakeNow,
},
},
},
},
},
{
name: "Status nil",
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: fakeNow,
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{},
},
},
},
{
name: "Status true - after grace period",
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: fakeOld,
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
LastHeartbeatTime: fakeOld,
LastTransitionTime: fakeOld,
},
},
},
},
},
{
name: "Status false - after grace period",
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: fakeOld,
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionFalse,
LastHeartbeatTime: fakeOld,
LastTransitionTime: fakeOld,
},
},
},
},
},
{
name: "Status unknown - after grace period",
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: fakeOld,
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionUnknown,
LastHeartbeatTime: fakeOld,
LastTransitionTime: fakeOld,
},
},
},
},
},
{
name: "Status nil - after grace period",
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node0",
CreationTimestamp: fakeOld,
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{},
},
},
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
nodeController.nodeHealthMap[test.node.Name] = &nodeHealthData{
status: &test.node.Status,
probeTimestamp: test.node.CreationTimestamp,
readyTransitionTimestamp: test.node.CreationTimestamp,
}
_, _, currentReadyCondition, err := nodeController.tryUpdateNodeHealth(test.node)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
_, savedReadyCondition := nodeutil.GetNodeCondition(nodeController.nodeHealthMap[test.node.Name].status, v1.NodeReady)
savedStatus := getStatus(savedReadyCondition)
currentStatus := getStatus(currentReadyCondition)
if currentStatus != savedStatus {
t.Errorf("expected %v, got %v", savedStatus, currentStatus)
}
})
}
}