Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a NodeCondition "NetworkUnavailable" to prevent scheduling onto a node until the routes have been created #26415

Merged
merged 2 commits into from
May 29, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
21 changes: 19 additions & 2 deletions pkg/api/resource_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,25 @@ func GetPodReadyCondition(status PodStatus) *PodCondition {
// GetPodCondition extracts the provided condition from the given status and returns that.
// Returns nil and -1 if the condition is not present, and the the index of the located condition.
func GetPodCondition(status *PodStatus, conditionType PodConditionType) (int, *PodCondition) {
for i, c := range status.Conditions {
if c.Type == conditionType {
if status == nil {
return -1, nil
}
for i := range status.Conditions {
if status.Conditions[i].Type == conditionType {
return i, &status.Conditions[i]
}
}
return -1, nil
}

// GetNodeCondition extracts the provided condition from the given status and returns that.
// Returns nil and -1 if the condition is not present, and the the index of the located condition.
func GetNodeCondition(status *NodeStatus, conditionType NodeConditionType) (int, *NodeCondition) {
if status == nil {
return -1, nil
}
for i := range status.Conditions {
if status.Conditions[i].Type == conditionType {
return i, &status.Conditions[i]
}
}
Expand Down
2 changes: 2 additions & 0 deletions pkg/api/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -2009,6 +2009,8 @@ const (
NodeOutOfDisk NodeConditionType = "OutOfDisk"
// NodeMemoryPressure means the kubelet is under pressure due to insufficient available memory.
NodeMemoryPressure NodeConditionType = "MemoryPressure"
// NodeNetworkUnavailable means that network for the node is not correctly configured.
NodeNetworkUnavailable NodeConditionType = "NetworkUnavailable"
)

type NodeCondition struct {
Expand Down
2 changes: 2 additions & 0 deletions pkg/api/v1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -2412,6 +2412,8 @@ const (
NodeOutOfDisk NodeConditionType = "OutOfDisk"
// NodeMemoryPressure means the kubelet is under pressure due to insufficient available memory.
NodeMemoryPressure NodeConditionType = "MemoryPressure"
// NodeNetworkUnavailable means that network for the node is not correctly configured.
NodeNetworkUnavailable NodeConditionType = "NetworkUnavailable"
)

// NodeCondition contains condition infromation for a node.
Expand Down
25 changes: 6 additions & 19 deletions pkg/controller/node/nodecontroller.go
Original file line number Diff line number Diff line change
Expand Up @@ -412,20 +412,6 @@ func (nc *NodeController) recycleCIDR(obj interface{}) {
}
}

// getCondition returns a condition object for the specific condition
// type, nil if the condition is not set.
func (nc *NodeController) getCondition(status *api.NodeStatus, conditionType api.NodeConditionType) *api.NodeCondition {
if status == nil {
return nil
}
for i := range status.Conditions {
if status.Conditions[i].Type == conditionType {
return &status.Conditions[i]
}
}
return nil
}

var gracefulDeletionVersion = version.MustParse("v1.1.0")

// maybeDeleteTerminatingPod non-gracefully deletes pods that are terminating
Expand Down Expand Up @@ -711,7 +697,7 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap
var err error
var gracePeriod time.Duration
var observedReadyCondition api.NodeCondition
currentReadyCondition := nc.getCondition(&node.Status, api.NodeReady)
_, currentReadyCondition := api.GetNodeCondition(&node.Status, api.NodeReady)
if currentReadyCondition == nil {
// If ready condition is nil, then kubelet (or nodecontroller) never posted node status.
// A fake ready condition is created, where LastProbeTime and LastTransitionTime is set
Expand Down Expand Up @@ -751,9 +737,9 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap
// if that's the case, but it does not seem necessary.
var savedCondition *api.NodeCondition
if found {
savedCondition = nc.getCondition(&savedNodeStatus.status, api.NodeReady)
_, savedCondition = api.GetNodeCondition(&savedNodeStatus.status, api.NodeReady)
}
observedCondition := nc.getCondition(&node.Status, api.NodeReady)
_, observedCondition := api.GetNodeCondition(&node.Status, api.NodeReady)
if !found {
glog.Warningf("Missing timestamp for Node %s. Assuming now as a timestamp.", node.Name)
savedNodeStatus = nodeStatusData{
Expand Down Expand Up @@ -829,7 +815,7 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap
// Like NodeReady condition, NodeOutOfDisk was last set longer ago than gracePeriod, so update
// it to Unknown (regardless of its current value) in the master.
// TODO(madhusudancs): Refactor this with readyCondition to remove duplicated code.
oodCondition := nc.getCondition(&node.Status, api.NodeOutOfDisk)
_, oodCondition := api.GetNodeCondition(&node.Status, api.NodeOutOfDisk)
if oodCondition == nil {
glog.V(2).Infof("Out of disk condition of node %v is never updated by kubelet", node.Name)
node.Status.Conditions = append(node.Status.Conditions, api.NodeCondition{
Expand All @@ -851,7 +837,8 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap
}
}

if !api.Semantic.DeepEqual(nc.getCondition(&node.Status, api.NodeReady), &observedReadyCondition) {
_, currentCondition := api.GetNodeCondition(&node.Status, api.NodeReady)
if !api.Semantic.DeepEqual(currentCondition, &observedReadyCondition) {
if _, err = nc.kubeClient.Core().Nodes().UpdateStatus(node); err != nil {
glog.Errorf("Error updating node %s: %v", node.Name, err)
return gracePeriod, observedReadyCondition, currentReadyCondition, err
Expand Down
66 changes: 66 additions & 0 deletions pkg/controller/route/routecontroller.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (

"github.com/golang/glog"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/unversioned"
clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
"k8s.io/kubernetes/pkg/cloudprovider"
"k8s.io/kubernetes/pkg/util/metrics"
Expand All @@ -36,6 +37,8 @@ const (
maxConcurrentRouteCreations int = 200
// Maximum number of retries of route creations.
maxRetries int = 5
// Maximum number of retries of node status update.
updateNodeStatusMaxRetries int = 3
)

type RouteController struct {
Expand Down Expand Up @@ -121,6 +124,8 @@ func (rc *RouteController) reconcile(nodes []api.Node, routes []*cloudprovider.R
rateLimiter <- struct{}{}
err := rc.routes.CreateRoute(rc.clusterName, nameHint, route)
<-rateLimiter

rc.updateNetworkingCondition(nodeName, err == nil)
if err != nil {
glog.Errorf("Could not create route %s %s for node %s after %v: %v", nameHint, route.DestinationCIDR, nodeName, time.Now().Sub(startTime), err)
} else {
Expand All @@ -129,6 +134,8 @@ func (rc *RouteController) reconcile(nodes []api.Node, routes []*cloudprovider.R
}
}
}(node.Name, nameHint, route)
} else {
rc.updateNetworkingCondition(node.Name, true)
}
nodeCIDRs[node.Name] = node.Spec.PodCIDR
}
Expand All @@ -155,6 +162,65 @@ func (rc *RouteController) reconcile(nodes []api.Node, routes []*cloudprovider.R
return nil
}

func updateNetworkingCondition(node *api.Node, routeCreated bool) {
_, networkingCondition := api.GetNodeCondition(&node.Status, api.NodeNetworkUnavailable)
currentTime := unversioned.Now()
if routeCreated {
if networkingCondition != nil && networkingCondition.Status != api.ConditionFalse {
networkingCondition.Status = api.ConditionFalse
networkingCondition.Reason = "RouteCreated"
networkingCondition.Message = "RouteController created a route"
networkingCondition.LastTransitionTime = currentTime
} else if networkingCondition == nil {
node.Status.Conditions = append(node.Status.Conditions, api.NodeCondition{
Type: api.NodeNetworkUnavailable,
Status: api.ConditionFalse,
Reason: "RouteCreated",
Message: "RouteController created a route",
LastTransitionTime: currentTime,
})
}
} else {
if networkingCondition != nil && networkingCondition.Status != api.ConditionTrue {
networkingCondition.Status = api.ConditionTrue
networkingCondition.Reason = "NoRouteCreated"
networkingCondition.Message = "RouteController failed to create a route"
networkingCondition.LastTransitionTime = currentTime
} else if networkingCondition == nil {
node.Status.Conditions = append(node.Status.Conditions, api.NodeCondition{
Type: api.NodeNetworkUnavailable,
Status: api.ConditionTrue,
Reason: "NoRouteCreated",
Message: "RouteController failed to create a route",
LastTransitionTime: currentTime,
})
}
}
}

func (rc *RouteController) updateNetworkingCondition(nodeName string, routeCreated bool) error {
var err error
for i := 0; i < updateNodeStatusMaxRetries; i++ {
node, err := rc.kubeClient.Core().Nodes().Get(nodeName)
if err != nil {
glog.Errorf("Error geting node: %v", err)
continue
}
updateNetworkingCondition(node, routeCreated)
// TODO: Use Patch instead once #26381 is merged.
// See kubernetes/node-problem-detector#9 for details.
if _, err = rc.kubeClient.Core().Nodes().UpdateStatus(node); err == nil {
return nil
}
if i+1 < updateNodeStatusMaxRetries {
glog.Errorf("Error updating node %s, retrying: %v", node.Name, err)
} else {
glog.Errorf("Error updating node %s: %v", node.Name, err)
}
}
return err
}

func (rc *RouteController) isResponsibleForRoute(route *cloudprovider.Route) bool {
_, cidr, err := net.ParseCIDR(route.DestinationCIDR)
if err != nil {
Expand Down
79 changes: 63 additions & 16 deletions pkg/controller/route/routecontroller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ import (
"time"

"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset/fake"
"k8s.io/kubernetes/pkg/client/testing/core"
"k8s.io/kubernetes/pkg/cloudprovider"
fakecloud "k8s.io/kubernetes/pkg/cloudprovider/providers/fake"
)
Expand Down Expand Up @@ -67,16 +69,22 @@ func TestIsResponsibleForRoute(t *testing.T) {

func TestReconcile(t *testing.T) {
cluster := "my-k8s"
node1 := api.Node{ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}}
node2 := api.Node{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: "10.120.1.0/24"}}
nodeNoCidr := api.Node{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: ""}}

testCases := []struct {
nodes []api.Node
initialRoutes []*cloudprovider.Route
expectedRoutes []*cloudprovider.Route
nodes []api.Node
initialRoutes []*cloudprovider.Route
expectedRoutes []*cloudprovider.Route
expectedNetworkUnavailable []bool
clientset *fake.Clientset
}{
// 2 nodes, routes already there
{
nodes: []api.Node{
{ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}},
{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: "10.120.1.0/24"}},
node1,
node2,
},
initialRoutes: []*cloudprovider.Route{
{cluster + "-01", "node-1", "10.120.0.0/24"},
Expand All @@ -86,12 +94,14 @@ func TestReconcile(t *testing.T) {
{cluster + "-01", "node-1", "10.120.0.0/24"},
{cluster + "-02", "node-2", "10.120.1.0/24"},
},
expectedNetworkUnavailable: []bool{true, true},
clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}),
},
// 2 nodes, one route already there
{
nodes: []api.Node{
{ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}},
{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: "10.120.1.0/24"}},
node1,
node2,
},
initialRoutes: []*cloudprovider.Route{
{cluster + "-01", "node-1", "10.120.0.0/24"},
Expand All @@ -100,24 +110,28 @@ func TestReconcile(t *testing.T) {
{cluster + "-01", "node-1", "10.120.0.0/24"},
{cluster + "-02", "node-2", "10.120.1.0/24"},
},
expectedNetworkUnavailable: []bool{true, true},
clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}),
},
// 2 nodes, no routes yet
{
nodes: []api.Node{
{ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}},
{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: "10.120.1.0/24"}},
node1,
node2,
},
initialRoutes: []*cloudprovider.Route{},
expectedRoutes: []*cloudprovider.Route{
{cluster + "-01", "node-1", "10.120.0.0/24"},
{cluster + "-02", "node-2", "10.120.1.0/24"},
},
expectedNetworkUnavailable: []bool{true, true},
clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}),
},
// 2 nodes, a few too many routes
{
nodes: []api.Node{
{ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}},
{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: "10.120.1.0/24"}},
node1,
node2,
},
initialRoutes: []*cloudprovider.Route{
{cluster + "-01", "node-1", "10.120.0.0/24"},
Expand All @@ -129,12 +143,14 @@ func TestReconcile(t *testing.T) {
{cluster + "-01", "node-1", "10.120.0.0/24"},
{cluster + "-02", "node-2", "10.120.1.0/24"},
},
expectedNetworkUnavailable: []bool{true, true},
clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}),
},
// 2 nodes, 2 routes, but only 1 is right
{
nodes: []api.Node{
{ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}},
{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: "10.120.1.0/24"}},
node1,
node2,
},
initialRoutes: []*cloudprovider.Route{
{cluster + "-01", "node-1", "10.120.0.0/24"},
Expand All @@ -144,17 +160,21 @@ func TestReconcile(t *testing.T) {
{cluster + "-01", "node-1", "10.120.0.0/24"},
{cluster + "-02", "node-2", "10.120.1.0/24"},
},
expectedNetworkUnavailable: []bool{true, true},
clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, node2}}),
},
// 2 nodes, one node without CIDR assigned.
{
nodes: []api.Node{
{ObjectMeta: api.ObjectMeta{Name: "node-1", UID: "01"}, Spec: api.NodeSpec{PodCIDR: "10.120.0.0/24"}},
{ObjectMeta: api.ObjectMeta{Name: "node-2", UID: "02"}, Spec: api.NodeSpec{PodCIDR: ""}},
node1,
nodeNoCidr,
},
initialRoutes: []*cloudprovider.Route{},
expectedRoutes: []*cloudprovider.Route{
{cluster + "-01", "node-1", "10.120.0.0/24"},
},
expectedNetworkUnavailable: []bool{true, false},
clientset: fake.NewSimpleClientset(&api.NodeList{Items: []api.Node{node1, nodeNoCidr}}),
},
}
for i, testCase := range testCases {
Expand All @@ -170,10 +190,37 @@ func TestReconcile(t *testing.T) {
t.Error("Error in test: fakecloud doesn't support Routes()")
}
_, cidr, _ := net.ParseCIDR("10.120.0.0/16")
rc := New(routes, nil, cluster, cidr)
rc := New(routes, testCase.clientset, cluster, cidr)
if err := rc.reconcile(testCase.nodes, testCase.initialRoutes); err != nil {
t.Errorf("%d. Error from rc.reconcile(): %v", i, err)
}
for _, action := range testCase.clientset.Actions() {
if action.GetVerb() == "update" && action.GetResource().Resource == "nodes" {
node := action.(core.UpdateAction).GetObject().(*api.Node)
_, condition := api.GetNodeCondition(&node.Status, api.NodeNetworkUnavailable)
if condition == nil {
t.Errorf("%d. Missing NodeNetworkUnavailable condition for Node %v", i, node.Name)
} else {
check := func(index int) bool {
return (condition.Status == api.ConditionFalse) == testCase.expectedNetworkUnavailable[index]
}
index := -1
for j := range testCase.nodes {
if testCase.nodes[j].Name == node.Name {
index = j
}
}
if index == -1 {
// Something's wrong
continue
}
if !check(index) {
t.Errorf("%d. Invalid NodeNetworkUnavailable condition for Node %v, expected %v, got %v",
i, node.Name, testCase.expectedNetworkUnavailable[index], (condition.Status == api.ConditionFalse))
}
}
}
}
var finalRoutes []*cloudprovider.Route
var err error
timeoutChan := time.After(200 * time.Millisecond)
Expand Down