Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

make it possible to allow discovery errors for controllers #49495

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions cmd/kube-controller-manager/app/BUILD
Expand Up @@ -111,6 +111,7 @@ go_library(
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime/serializer:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/runtime:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//vendor/k8s.io/apiserver/pkg/server/healthz:go_default_library",
Expand Down
6 changes: 5 additions & 1 deletion cmd/kube-controller-manager/app/controllermanager.go
Expand Up @@ -33,6 +33,7 @@ import (
"time"

"k8s.io/apimachinery/pkg/runtime/schema"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"

Expand Down Expand Up @@ -366,7 +367,10 @@ func GetAvailableResources(clientBuilder controller.ControllerClientBuilder) (ma

resourceMap, err := discoveryClient.ServerResources()
if err != nil {
return nil, fmt.Errorf("failed to get supported resources from server: %v", err)
utilruntime.HandleError(fmt.Errorf("unable to get all supported resources from server: %v", err))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

HandleError only logs here? Does this fix the issue where if I have two versions of a TPR, the cm crash loops?

cc @caesarxuchao

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

HandleError only logs here?

The choice is up to the admin I suppose, there's an env var to force it either way. Returning an error unconditionally fails, so this at least gives them the choice.

Does this fix the issue where if I have two versions of a TPR, the cm crash loops?

I'm not familiar with this. Got an issue handy?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This means that we continue, go into the if clause below and return an error. What have we won with this change?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This means that we continue, go into the if clause below and return an error. What have we won with this change?

it can return an error and a the results it was able to get. Consider the aggregated case with one server of ten being down.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The TPR issue @mikedanese mentioned is #22768 (comment). If user created multiple versions of a TPR, only the first version is operational, so the discovery here fails and causes controller manager to crashloop.

}
if len(resourceMap) == 0 {
return nil, fmt.Errorf("unable to get any supported resources from server")
}

allResources := map[schema.GroupVersionResource]bool{}
Expand Down
6 changes: 5 additions & 1 deletion cmd/kube-controller-manager/app/core.go
Expand Up @@ -31,6 +31,7 @@ import (
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/runtime/serializer"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/discovery"
"k8s.io/client-go/dynamic"
Expand Down Expand Up @@ -303,7 +304,10 @@ func startGarbageCollectorController(ctx ControllerContext) (bool, error) {
gcClientset := ctx.ClientBuilder.ClientOrDie("generic-garbage-collector")
preferredResources, err := gcClientset.Discovery().ServerPreferredResources()
if err != nil {
return true, fmt.Errorf("failed to get supported resources from server: %v", err)
utilruntime.HandleError(fmt.Errorf("unable to get all supported resources from server: %v", err))
}
if len(preferredResources) == 0 {
return true, fmt.Errorf("unable to get any supported resources from server: %v", err)
}
deletableResources := discovery.FilteredBy(discovery.SupportsAllVerbs{Verbs: []string{"get", "list", "watch", "patch", "update", "delete"}}, preferredResources)
deletableGroupVersionResources, err := discovery.GroupVersionResources(deletableResources)
Expand Down
1 change: 1 addition & 0 deletions pkg/controller/namespace/deletion/BUILD
Expand Up @@ -20,6 +20,7 @@ go_library(
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1/unstructured:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/runtime:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/client-go/discovery:go_default_library",
"//vendor/k8s.io/client-go/dynamic:go_default_library",
Expand Down
Expand Up @@ -30,6 +30,7 @@ import (
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime/schema"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/discovery"
"k8s.io/client-go/dynamic"
Expand Down Expand Up @@ -165,7 +166,10 @@ func (d *namespacedResourcesDeleter) initOpCache() {
// TODO(sttts): get rid of opCache and http 405 logic around it and trust discovery info
resources, err := d.discoverResourcesFn()
if err != nil {
glog.Fatalf("Failed to get supported resources: %v", err)
utilruntime.HandleError(fmt.Errorf("unable to get all supported resources from server: %v", err))
}
if len(resources) == 0 {
glog.Fatalf("Unable to get any supported resources from server: %v", err)
}
deletableGroupVersionResources := []schema.GroupVersionResource{}
for _, rl := range resources {
Expand Down
19 changes: 5 additions & 14 deletions staging/src/k8s.io/client-go/discovery/discovery_client.go
Expand Up @@ -187,7 +187,7 @@ func (d *DiscoveryClient) ServerResourcesForGroupVersion(groupVersion string) (r
}

// serverResources returns the supported resources for all groups and versions.
func (d *DiscoveryClient) serverResources(failEarly bool) ([]*metav1.APIResourceList, error) {
func (d *DiscoveryClient) serverResources() ([]*metav1.APIResourceList, error) {
apiGroups, err := d.ServerGroups()
if err != nil {
return nil, err
Expand All @@ -203,9 +203,6 @@ func (d *DiscoveryClient) serverResources(failEarly bool) ([]*metav1.APIResource
if err != nil {
// TODO: maybe restrict this to NotFound errors
failedGroups[gv] = err
if failEarly {
return nil, &ErrGroupDiscoveryFailed{Groups: failedGroups}
}
continue
}

Expand Down Expand Up @@ -249,7 +246,7 @@ func IsGroupDiscoveryFailedError(err error) bool {
}

// serverPreferredResources returns the supported resources with the version preferred by the server.
func (d *DiscoveryClient) serverPreferredResources(failEarly bool) ([]*metav1.APIResourceList, error) {
func (d *DiscoveryClient) serverPreferredResources() ([]*metav1.APIResourceList, error) {
serverGroupList, err := d.ServerGroups()
if err != nil {
return nil, err
Expand All @@ -269,9 +266,6 @@ func (d *DiscoveryClient) serverPreferredResources(failEarly bool) ([]*metav1.AP
if err != nil {
// TODO: maybe restrict this to NotFound errors
failedGroups[groupVersion] = err
if failEarly {
return nil, &ErrGroupDiscoveryFailed{Groups: failedGroups}
}
continue
}

Expand Down Expand Up @@ -316,9 +310,7 @@ func (d *DiscoveryClient) serverPreferredResources(failEarly bool) ([]*metav1.AP
// ServerPreferredResources returns the supported resources with the version preferred by the
// server.
func (d *DiscoveryClient) ServerPreferredResources() ([]*metav1.APIResourceList, error) {
return withRetries(defaultRetries, func(retryEarly bool) ([]*metav1.APIResourceList, error) {
return d.serverPreferredResources(retryEarly)
})
return withRetries(defaultRetries, d.serverPreferredResources)
}

// ServerPreferredNamespacedResources returns the supported namespaced resources with the
Expand Down Expand Up @@ -394,12 +386,11 @@ func (d *DiscoveryClient) OpenAPISchema() (*openapi_v2.Document, error) {
}

// withRetries retries the given recovery function in case the groups supported by the server change after ServerGroup() returns.
func withRetries(maxRetries int, f func(failEarly bool) ([]*metav1.APIResourceList, error)) ([]*metav1.APIResourceList, error) {
func withRetries(maxRetries int, f func() ([]*metav1.APIResourceList, error)) ([]*metav1.APIResourceList, error) {
var result []*metav1.APIResourceList
var err error
for i := 0; i < maxRetries; i++ {
failEarly := i < maxRetries-1
result, err = f(failEarly)
result, err = f()
if err == nil {
return result, nil
}
Expand Down