Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automated cherry pick of #94316: Fixed reflector not recovering from "Too large resource #94430

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
23 changes: 22 additions & 1 deletion staging/src/k8s.io/client-go/tools/cache/reflector.go
Expand Up @@ -551,5 +551,26 @@ func isExpiredError(err error) bool {
}

func isTooLargeResourceVersionError(err error) bool {
return apierrors.HasStatusCause(err, metav1.CauseTypeResourceVersionTooLarge)
if apierrors.HasStatusCause(err, metav1.CauseTypeResourceVersionTooLarge) {
return true
}
// In Kubernetes 1.17.0-1.18.5, the api server doesn't set the error status cause to
// metav1.CauseTypeResourceVersionTooLarge to indicate that the requested minimum resource
// version is larger than the largest currently available resource version. To ensure backward
// compatibility with these server versions we also need to detect the error based on the content
// of the error message field.
if !apierrors.IsTimeout(err) {
return false
}
apierr, ok := err.(apierrors.APIStatus)
if !ok || apierr == nil || apierr.Status().Details == nil {
return false
}
for _, cause := range apierr.Status().Details.Causes {
// Matches the message returned by api server 1.17.0-1.18.5 for this error condition
if cause.Message == "Too large resource version" {
return true
}
}
return false
}
18 changes: 13 additions & 5 deletions staging/src/k8s.io/client-go/tools/cache/reflector_test.go
Expand Up @@ -738,9 +738,14 @@ func TestReflectorFullListIfTooLarge(t *testing.T) {
err := apierrors.NewTimeoutError("too large resource version", 1)
err.ErrStatus.Details.Causes = []metav1.StatusCause{{Type: metav1.CauseTypeResourceVersionTooLarge}}
return nil, err
// relist after the initial list (covers the error format used in api server 1.17.0-1.18.5)
case "30":
err := apierrors.NewTimeoutError("too large resource version", 1)
err.ErrStatus.Details.Causes = []metav1.StatusCause{{Message: "Too large resource version"}}
return nil, err
// relist from etcd after "too large" error
case "":
return &v1.PodList{ListMeta: metav1.ListMeta{ResourceVersion: "10"}}, nil
return &v1.PodList{ListMeta: metav1.ListMeta{ResourceVersion: "30"}}, nil
default:
return nil, fmt.Errorf("unexpected List call: %s", options.ResourceVersion)
}
Expand All @@ -759,12 +764,15 @@ func TestReflectorFullListIfTooLarge(t *testing.T) {
// may be synced to a different version and they will never converge.
// TODO: We should use etcd progress-notify feature to avoid this behavior but until this is
// done we simply try to relist from now to avoid continuous errors on relists.
stopCh = make(chan struct{})
if err := r.ListAndWatch(stopCh); err != nil {
t.Fatal(err)
for i := 1; i <= 2; i++ {
// relist twice to cover the two variants of TooLargeResourceVersion api errors
stopCh = make(chan struct{})
if err := r.ListAndWatch(stopCh); err != nil {
t.Fatal(err)
}
}

expectedRVs := []string{"0", "20", ""}
expectedRVs := []string{"0", "20", "", "30", ""}
if !reflect.DeepEqual(listCallRVs, expectedRVs) {
t.Errorf("Expected series of list calls with resource version of %#v but got: %#v", expectedRVs, listCallRVs)
}
Expand Down