Skip to content

Commit

Permalink
Merge pull request #92743 from liggitt/gc
Browse files Browse the repository at this point in the history
Fix GC uid races and handling of conflicting ownerReferences
  • Loading branch information
k8s-ci-robot committed Nov 17, 2020
2 parents 6dddea5 + e491c3b commit e1ab99e
Show file tree
Hide file tree
Showing 12 changed files with 2,394 additions and 66 deletions.
1 change: 1 addition & 0 deletions cmd/kube-controller-manager/app/core.go
Expand Up @@ -553,6 +553,7 @@ func startGarbageCollectorController(ctx ControllerContext) (http.Handler, bool,
ignoredResources[schema.GroupResource{Group: r.Group, Resource: r.Resource}] = struct{}{}
}
garbageCollector, err := garbagecollector.NewGarbageCollector(
gcClientset,
metadataClient,
ctx.RESTMapper,
ignoredResources,
Expand Down
14 changes: 14 additions & 0 deletions pkg/controller/garbagecollector/BUILD
Expand Up @@ -20,7 +20,9 @@ go_library(
],
importpath = "k8s.io/kubernetes/pkg/controller/garbagecollector",
deps = [
"//pkg/controller/apis/config/scheme:go_default_library",
"//pkg/controller/garbagecollector/metaonly:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/meta:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
Expand All @@ -33,8 +35,10 @@ go_library(
"//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//staging/src/k8s.io/client-go/discovery:go_default_library",
"//staging/src/k8s.io/client-go/kubernetes:go_default_library",
"//staging/src/k8s.io/client-go/kubernetes/typed/core/v1:go_default_library",
"//staging/src/k8s.io/client-go/metadata:go_default_library",
"//staging/src/k8s.io/client-go/tools/cache:go_default_library",
"//staging/src/k8s.io/client-go/tools/record:go_default_library",
"//staging/src/k8s.io/client-go/util/retry:go_default_library",
"//staging/src/k8s.io/client-go/util/workqueue:go_default_library",
"//staging/src/k8s.io/controller-manager/pkg/informerfactory:go_default_library",
Expand All @@ -52,15 +56,18 @@ go_test(
srcs = [
"dump_test.go",
"garbagecollector_test.go",
"graph_builder_test.go",
],
embed = [":go_default_library"],
deps = [
"//pkg/api/legacyscheme:go_default_library",
"//pkg/apis/core/install:go_default_library",
"//pkg/controller/garbagecollector/metaonly:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/meta:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/meta/testrestmapper:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/json:go_default_library",
Expand All @@ -71,14 +78,21 @@ go_test(
"//staging/src/k8s.io/client-go/kubernetes:go_default_library",
"//staging/src/k8s.io/client-go/kubernetes/fake:go_default_library",
"//staging/src/k8s.io/client-go/metadata:go_default_library",
"//staging/src/k8s.io/client-go/metadata/fake:go_default_library",
"//staging/src/k8s.io/client-go/metadata/metadatainformer:go_default_library",
"//staging/src/k8s.io/client-go/rest:go_default_library",
"//staging/src/k8s.io/client-go/testing:go_default_library",
"//staging/src/k8s.io/client-go/tools/record:go_default_library",
"//staging/src/k8s.io/client-go/util/workqueue:go_default_library",
"//staging/src/k8s.io/controller-manager/pkg/informerfactory:go_default_library",
"//vendor/github.com/davecgh/go-spew/spew:go_default_library",
"//vendor/github.com/golang/groupcache/lru:go_default_library",
"//vendor/github.com/google/go-cmp/cmp:go_default_library",
"//vendor/github.com/stretchr/testify/assert:go_default_library",
"//vendor/golang.org/x/time/rate:go_default_library",
"//vendor/gonum.org/v1/gonum/graph:go_default_library",
"//vendor/gonum.org/v1/gonum/graph/simple:go_default_library",
"//vendor/k8s.io/utils/pointer:go_default_library",
],
)

Expand Down
91 changes: 76 additions & 15 deletions pkg/controller/garbagecollector/garbagecollector.go
Expand Up @@ -18,13 +18,15 @@ package garbagecollector

import (
"context"
goerrors "errors"
"fmt"
"reflect"
"sync"
"time"

"k8s.io/klog/v2"

v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand All @@ -35,10 +37,14 @@ import (
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/discovery"
clientset "k8s.io/client-go/kubernetes"
v1core "k8s.io/client-go/kubernetes/typed/core/v1"
"k8s.io/client-go/metadata"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/record"
"k8s.io/client-go/util/workqueue"
"k8s.io/controller-manager/pkg/informerfactory"
"k8s.io/kubernetes/pkg/controller/apis/config/scheme"

// import known versions
_ "k8s.io/client-go/kubernetes"
Expand Down Expand Up @@ -67,22 +73,29 @@ type GarbageCollector struct {
attemptToOrphan workqueue.RateLimitingInterface
dependencyGraphBuilder *GraphBuilder
// GC caches the owners that do not exist according to the API server.
absentOwnerCache *UIDCache
absentOwnerCache *ReferenceCache

workerLock sync.RWMutex
}

// NewGarbageCollector creates a new GarbageCollector.
func NewGarbageCollector(
kubeClient clientset.Interface,
metadataClient metadata.Interface,
mapper resettableRESTMapper,
ignoredResources map[schema.GroupResource]struct{},
sharedInformers informerfactory.InformerFactory,
informersStarted <-chan struct{},
) (*GarbageCollector, error) {

eventBroadcaster := record.NewBroadcaster()
eventBroadcaster.StartStructuredLogging(0)
eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")})
eventRecorder := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "garbage-collector-controller"})

attemptToDelete := workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "garbage_collector_attempt_to_delete")
attemptToOrphan := workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "garbage_collector_attempt_to_orphan")
absentOwnerCache := NewUIDCache(500)
absentOwnerCache := NewReferenceCache(500)
gc := &GarbageCollector{
metadataClient: metadataClient,
restMapper: mapper,
Expand All @@ -91,6 +104,7 @@ func NewGarbageCollector(
absentOwnerCache: absentOwnerCache,
}
gc.dependencyGraphBuilder = &GraphBuilder{
eventRecorder: eventRecorder,
metadataClient: metadataClient,
informersStarted: informersStarted,
restMapper: mapper,
Expand Down Expand Up @@ -281,6 +295,10 @@ func (gc *GarbageCollector) runAttemptToDeleteWorker() {
}
}

var enqueuedVirtualDeleteEventErr = goerrors.New("enqueued virtual delete event")

var namespacedOwnerOfClusterScopedObjectErr = goerrors.New("cluster-scoped objects cannot refer to namespaced owners")

func (gc *GarbageCollector) attemptToDeleteWorker() bool {
item, quit := gc.attemptToDelete.Get()
gc.workerLock.RLock()
Expand All @@ -294,8 +312,31 @@ func (gc *GarbageCollector) attemptToDeleteWorker() bool {
utilruntime.HandleError(fmt.Errorf("expect *node, got %#v", item))
return true
}

if !n.isObserved() {
nodeFromGraph, existsInGraph := gc.dependencyGraphBuilder.uidToNode.Read(n.identity.UID)
if !existsInGraph {
// this can happen if attemptToDelete loops on a requeued virtual node because attemptToDeleteItem returned an error,
// and in the meantime a deletion of the real object associated with that uid was observed
klog.V(5).Infof("item %s no longer in the graph, skipping attemptToDeleteItem", n)
return true
}
if nodeFromGraph.isObserved() {
// this can happen if attemptToDelete loops on a requeued virtual node because attemptToDeleteItem returned an error,
// and in the meantime the real object associated with that uid was observed
klog.V(5).Infof("item %s no longer virtual in the graph, skipping attemptToDeleteItem on virtual node", n)
return true
}
}

err := gc.attemptToDeleteItem(n)
if err != nil {
if err == enqueuedVirtualDeleteEventErr {
// a virtual event was produced and will be handled by processGraphChanges, no need to requeue this node
return true
} else if err == namespacedOwnerOfClusterScopedObjectErr {
// a cluster-scoped object referring to a namespaced owner is an error that will not resolve on retry, no need to requeue this node
return true
} else if err != nil {
if _, ok := err.(*restMappingError); ok {
// There are at least two ways this can happen:
// 1. The reference is to an object of a custom type that has not yet been
Expand Down Expand Up @@ -325,10 +366,20 @@ func (gc *GarbageCollector) attemptToDeleteWorker() bool {
// returns its latest state.
func (gc *GarbageCollector) isDangling(reference metav1.OwnerReference, item *node) (
dangling bool, owner *metav1.PartialObjectMetadata, err error) {
if gc.absentOwnerCache.Has(reference.UID) {

// check for recorded absent cluster-scoped parent
absentOwnerCacheKey := objectReference{OwnerReference: ownerReferenceCoordinates(reference)}
if gc.absentOwnerCache.Has(absentOwnerCacheKey) {
klog.V(5).Infof("according to the absentOwnerCache, object %s's owner %s/%s, %s does not exist", item.identity.UID, reference.APIVersion, reference.Kind, reference.Name)
return true, nil, nil
}
// check for recorded absent namespaced parent
absentOwnerCacheKey.Namespace = item.identity.Namespace
if gc.absentOwnerCache.Has(absentOwnerCacheKey) {
klog.V(5).Infof("according to the absentOwnerCache, object %s's owner %s/%s, %s does not exist in namespace %s", item.identity.UID, reference.APIVersion, reference.Kind, reference.Name, item.identity.Namespace)
return true, nil, nil
}

// TODO: we need to verify the reference resource is supported by the
// system. If it's not a valid resource, the garbage collector should i)
// ignore the reference when decide if the object should be deleted, and
Expand All @@ -339,14 +390,24 @@ func (gc *GarbageCollector) isDangling(reference metav1.OwnerReference, item *no
if err != nil {
return false, nil, err
}
if !namespaced {
absentOwnerCacheKey.Namespace = ""
}

if len(item.identity.Namespace) == 0 && namespaced {
// item is a cluster-scoped object referring to a namespace-scoped owner, which is not valid.
// return a marker error, rather than retrying on the lookup failure forever.
klog.V(2).Infof("object %s is cluster-scoped, but refers to a namespaced owner of type %s/%s", item.identity, reference.APIVersion, reference.Kind)
return false, nil, namespacedOwnerOfClusterScopedObjectErr
}

// TODO: It's only necessary to talk to the API server if the owner node
// is a "virtual" node. The local graph could lag behind the real
// status, but in practice, the difference is small.
owner, err = gc.metadataClient.Resource(resource).Namespace(resourceDefaultNamespace(namespaced, item.identity.Namespace)).Get(context.TODO(), reference.Name, metav1.GetOptions{})
switch {
case errors.IsNotFound(err):
gc.absentOwnerCache.Add(reference.UID)
gc.absentOwnerCache.Add(absentOwnerCacheKey)
klog.V(5).Infof("object %s's owner %s/%s, %s is not found", item.identity.UID, reference.APIVersion, reference.Kind, reference.Name)
return true, nil, nil
case err != nil:
Expand All @@ -355,7 +416,7 @@ func (gc *GarbageCollector) isDangling(reference metav1.OwnerReference, item *no

if owner.GetUID() != reference.UID {
klog.V(5).Infof("object %s's owner %s/%s, %s is not found, UID mismatch", item.identity.UID, reference.APIVersion, reference.Kind, reference.Name)
gc.absentOwnerCache.Add(reference.UID)
gc.absentOwnerCache.Add(absentOwnerCacheKey)
return true, nil, nil
}
return false, owner, nil
Expand Down Expand Up @@ -400,9 +461,15 @@ func ownerRefsToUIDs(refs []metav1.OwnerReference) []types.UID {
return ret
}

// attemptToDeleteItem looks up the live API object associated with the node,
// and issues a delete IFF the uid matches, the item is not blocked on deleting dependents,
// and all owner references are dangling.
//
// if the API get request returns a NotFound error, or the retrieved item's uid does not match,
// a virtual delete event for the node is enqueued and enqueuedVirtualDeleteEventErr is returned.
func (gc *GarbageCollector) attemptToDeleteItem(item *node) error {
klog.V(2).InfoS("Processing object", "object", klog.KRef(item.identity.Namespace, item.identity.Name),
"objectUID", item.identity.UID, "kind", item.identity.Kind)
"objectUID", item.identity.UID, "kind", item.identity.Kind, "virtual", !item.isObserved())

// "being deleted" is an one-way trip to the final deletion. We'll just wait for the final deletion, and then process the object's dependents.
if item.isBeingDeleted() && !item.isDeletingDependents() {
Expand All @@ -420,21 +487,15 @@ func (gc *GarbageCollector) attemptToDeleteItem(item *node) error {
// the virtual node from GraphBuilder.uidToNode.
klog.V(5).Infof("item %v not found, generating a virtual delete event", item.identity)
gc.dependencyGraphBuilder.enqueueVirtualDeleteEvent(item.identity)
// since we're manually inserting a delete event to remove this node,
// we don't need to keep tracking it as a virtual node and requeueing in attemptToDelete
item.markObserved()
return nil
return enqueuedVirtualDeleteEventErr
case err != nil:
return err
}

if latest.GetUID() != item.identity.UID {
klog.V(5).Infof("UID doesn't match, item %v not found, generating a virtual delete event", item.identity)
gc.dependencyGraphBuilder.enqueueVirtualDeleteEvent(item.identity)
// since we're manually inserting a delete event to remove this node,
// we don't need to keep tracking it as a virtual node and requeueing in attemptToDelete
item.markObserved()
return nil
return enqueuedVirtualDeleteEventErr
}

// TODO: attemptToOrphanWorker() routine is similar. Consider merging
Expand Down

0 comments on commit e1ab99e

Please sign in to comment.