Skip to content

Commit

Permalink
Merge pull request #49821 from MrHohn/routecontroller-error-event
Browse files Browse the repository at this point in the history
Automatic merge from submit-queue (batch tested with PRs 46519, 49794, 49720, 49692, 49821)

[route_controller] Emit event when failed to create route

**What this PR does / why we need it**:
When route controller failed to create route for nodes, pod's networking is likely broken. We surface this situation through node condition (https://github.com/kubernetes/kubernetes/blob/v1.7.2/pkg/controller/route/routecontroller.go#L197-L231), but this might not be easily tracked when user have a large cluster with many nodes. Surface the error through event might make things more obvious.

**Which issue this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close that issue when PR gets merged)*: fixes #

**Special notes for your reviewer**:
cc @bowei 

**Release note**:

```release-note
NONE
```
  • Loading branch information
Kubernetes Submit Queue committed Aug 1, 2017
2 parents 6f992c9 + 28d2e67 commit 665db3b
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 1 deletion.
3 changes: 3 additions & 0 deletions pkg/controller/route/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,11 @@ go_library(
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//vendor/k8s.io/client-go/informers/core/v1:go_default_library",
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
"//vendor/k8s.io/client-go/kubernetes/scheme:go_default_library",
"//vendor/k8s.io/client-go/kubernetes/typed/core/v1:go_default_library",
"//vendor/k8s.io/client-go/listers/core/v1:go_default_library",
"//vendor/k8s.io/client-go/tools/cache:go_default_library",
"//vendor/k8s.io/client-go/tools/record:go_default_library",
],
)

Expand Down
27 changes: 26 additions & 1 deletion pkg/controller/route/routecontroller.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,11 @@ import (
"k8s.io/apimachinery/pkg/util/wait"
coreinformers "k8s.io/client-go/informers/core/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/scheme"
v1core "k8s.io/client-go/kubernetes/typed/core/v1"
corelisters "k8s.io/client-go/listers/core/v1"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/record"
v1node "k8s.io/kubernetes/pkg/api/v1/node"
"k8s.io/kubernetes/pkg/cloudprovider"
"k8s.io/kubernetes/pkg/controller"
Expand All @@ -58,6 +61,8 @@ type RouteController struct {
clusterCIDR *net.IPNet
nodeLister corelisters.NodeLister
nodeListerSynced cache.InformerSynced
broadcaster record.EventBroadcaster
recorder record.EventRecorder
}

func New(routes cloudprovider.Routes, kubeClient clientset.Interface, nodeInformer coreinformers.NodeInformer, clusterName string, clusterCIDR *net.IPNet) *RouteController {
Expand All @@ -69,13 +74,18 @@ func New(routes cloudprovider.Routes, kubeClient clientset.Interface, nodeInform
glog.Fatal("RouteController: Must specify clusterCIDR.")
}

eventBroadcaster := record.NewBroadcaster()
recorder := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "route_controller"})

rc := &RouteController{
routes: routes,
kubeClient: kubeClient,
clusterName: clusterName,
clusterCIDR: clusterCIDR,
nodeLister: nodeInformer.Lister(),
nodeListerSynced: nodeInformer.Informer().HasSynced,
broadcaster: eventBroadcaster,
recorder: recorder,
}

return rc
Expand All @@ -91,6 +101,10 @@ func (rc *RouteController) Run(stopCh <-chan struct{}, syncPeriod time.Duration)
return
}

if rc.broadcaster != nil {
rc.broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: v1core.New(rc.kubeClient.Core().RESTClient()).Events("")})
}

// TODO: If we do just the full Resync every 5 minutes (default value)
// that means that we may wait up to 5 minutes before even starting
// creating a route for it. This is bad.
Expand Down Expand Up @@ -160,7 +174,18 @@ func (rc *RouteController) reconcile(nodes []*v1.Node, routes []*cloudprovider.R

rc.updateNetworkingCondition(nodeName, err == nil)
if err != nil {
glog.Errorf("Could not create route %s %s for node %s after %v: %v", nameHint, route.DestinationCIDR, nodeName, time.Now().Sub(startTime), err)
msg := fmt.Sprintf("Could not create route %s %s for node %s after %v: %v", nameHint, route.DestinationCIDR, nodeName, time.Now().Sub(startTime), err)
if rc.recorder != nil {
rc.recorder.Eventf(
&v1.ObjectReference{
Kind: "Node",
Name: string(nodeName),
UID: types.UID(nodeName),
Namespace: "",
}, v1.EventTypeWarning, "FailedToCreateRoute", msg)
}
glog.Error(msg)

} else {
glog.Infof("Created route for node %s %s with hint %s after %v", nodeName, route.DestinationCIDR, nameHint, time.Now().Sub(startTime))
return
Expand Down

0 comments on commit 665db3b

Please sign in to comment.