Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automated cherry pick of #98068: Ensure invalid cluster-scoped children do not block cleanup of valid namespaced children #98105

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
264 changes: 264 additions & 0 deletions pkg/controller/garbagecollector/garbagecollector_test.go
Expand Up @@ -1956,6 +1956,270 @@ func TestConflictingData(t *testing.T) {
}),
},
},
{
// https://github.com/kubernetes/kubernetes/issues/98040
name: "cluster-scoped bad child, namespaced good child, missing parent",
steps: []step{
// setup
createObjectInClient("", "v1", "pods", "ns1", makeMetadataObj(pod2ns1, pod1ns1)), // good child
createObjectInClient("", "v1", "nodes", "", makeMetadataObj(node1, pod1nonamespace)), // bad child

// 2,3: observe bad child
processEvent(makeAddEvent(node1, pod1nonamespace)),
assertState(state{
graphNodes: []*node{
makeNode(node1, withOwners(pod1nonamespace)),
makeNode(pod1nonamespace, virtual)},
pendingAttemptToDelete: []*node{
makeNode(pod1nonamespace, virtual)}, // virtual parent queued for deletion
}),

// 4,5: observe good child
processEvent(makeAddEvent(pod2ns1, pod1ns1)),
assertState(state{
graphNodes: []*node{
makeNode(node1, withOwners(pod1nonamespace)),
makeNode(pod2ns1, withOwners(pod1ns1)),
makeNode(pod1nonamespace, virtual)},
pendingAttemptToDelete: []*node{
makeNode(pod1nonamespace, virtual), // virtual parent queued for deletion
makeNode(pod2ns1, withOwners(pod1ns1)), // mismatched child queued for deletion
},
}),

// 6,7: process attemptToDelete of bad virtual parent coordinates
processAttemptToDelete(1),
assertState(state{
graphNodes: []*node{
makeNode(node1, withOwners(pod1nonamespace)),
makeNode(pod2ns1, withOwners(pod1ns1)),
makeNode(pod1nonamespace, virtual)},
pendingAttemptToDelete: []*node{
makeNode(pod2ns1, withOwners(pod1ns1))}, // mismatched child queued for deletion
}),

// 8,9: process attemptToDelete of good child
processAttemptToDelete(1),
assertState(state{
clientActions: []string{
"get /v1, Resource=pods ns=ns1 name=podname2", // get good child, returns 200
"get /v1, Resource=pods ns=ns1 name=podname1", // get missing parent, returns 404
"delete /v1, Resource=pods ns=ns1 name=podname2", // delete good child
},
graphNodes: []*node{
makeNode(node1, withOwners(pod1nonamespace)),
makeNode(pod2ns1, withOwners(pod1ns1)),
makeNode(pod1nonamespace, virtual)},
absentOwnerCache: []objectReference{pod1ns1}, // missing parent cached
}),

// 10,11: observe deletion of good child
// steady-state is bad cluster child and bad virtual parent coordinates, with no retries
processEvent(makeDeleteEvent(pod2ns1, pod1ns1)),
assertState(state{
graphNodes: []*node{
makeNode(node1, withOwners(pod1nonamespace)),
makeNode(pod1nonamespace, virtual)},
absentOwnerCache: []objectReference{pod1ns1},
}),
},
},
{
// https://github.com/kubernetes/kubernetes/issues/98040
name: "cluster-scoped bad child, namespaced good child, late observed parent",
steps: []step{
// setup
createObjectInClient("", "v1", "pods", "ns1", makeMetadataObj(pod1ns1)), // good parent
createObjectInClient("", "v1", "pods", "ns1", makeMetadataObj(pod2ns1, pod1ns1)), // good child
createObjectInClient("", "v1", "nodes", "", makeMetadataObj(node1, pod1nonamespace)), // bad child

// 3,4: observe bad child
processEvent(makeAddEvent(node1, pod1nonamespace)),
assertState(state{
graphNodes: []*node{
makeNode(node1, withOwners(pod1nonamespace)),
makeNode(pod1nonamespace, virtual)},
pendingAttemptToDelete: []*node{
makeNode(pod1nonamespace, virtual)}, // virtual parent queued for deletion
}),

// 5,6: observe good child
processEvent(makeAddEvent(pod2ns1, pod1ns1)),
assertState(state{
graphNodes: []*node{
makeNode(node1, withOwners(pod1nonamespace)),
makeNode(pod2ns1, withOwners(pod1ns1)),
makeNode(pod1nonamespace, virtual)},
pendingAttemptToDelete: []*node{
makeNode(pod1nonamespace, virtual), // virtual parent queued for deletion
makeNode(pod2ns1, withOwners(pod1ns1))}, // mismatched child queued for deletion
}),

// 7,8: process attemptToDelete of bad virtual parent coordinates
processAttemptToDelete(1),
assertState(state{
graphNodes: []*node{
makeNode(node1, withOwners(pod1nonamespace)),
makeNode(pod2ns1, withOwners(pod1ns1)),
makeNode(pod1nonamespace, virtual)},
pendingAttemptToDelete: []*node{
makeNode(pod2ns1, withOwners(pod1ns1))}, // mismatched child queued for deletion
}),

// 9,10: process attemptToDelete of good child
processAttemptToDelete(1),
assertState(state{
clientActions: []string{
"get /v1, Resource=pods ns=ns1 name=podname2", // get good child, returns 200
"get /v1, Resource=pods ns=ns1 name=podname1", // get late-observed parent, returns 200
},
graphNodes: []*node{
makeNode(node1, withOwners(pod1nonamespace)),
makeNode(pod2ns1, withOwners(pod1ns1)),
makeNode(pod1nonamespace, virtual)},
}),

// 11,12: late observe good parent
processEvent(makeAddEvent(pod1ns1)),
assertState(state{
graphNodes: []*node{
makeNode(node1, withOwners(pod1nonamespace)),
makeNode(pod2ns1, withOwners(pod1ns1)),
makeNode(pod1ns1)},
// warn about bad node reference
events: []string{`Warning OwnerRefInvalidNamespace ownerRef [v1/Pod, namespace: , name: podname1, uid: poduid1] does not exist in namespace "" involvedObject{kind=Node,apiVersion=v1}`},
pendingAttemptToDelete: []*node{
makeNode(node1, withOwners(pod1nonamespace))}, // queue bad cluster-scoped child for delete attempt
}),

// 13,14: process attemptToDelete of bad child
// steady state is bad cluster-scoped child remaining with no retries, good parent and good child in graph
processAttemptToDelete(1),
assertState(state{
clientActions: []string{
"get /v1, Resource=nodes name=nodename", // get bad child, returns 200
},
graphNodes: []*node{
makeNode(node1, withOwners(pod1nonamespace)),
makeNode(pod2ns1, withOwners(pod1ns1)),
makeNode(pod1ns1)},
}),
},
},
{
// https://github.com/kubernetes/kubernetes/issues/98040
name: "namespaced good child, cluster-scoped bad child, missing parent",
steps: []step{
// setup
createObjectInClient("", "v1", "pods", "ns1", makeMetadataObj(pod2ns1, pod1ns1)), // good child
createObjectInClient("", "v1", "nodes", "", makeMetadataObj(node1, pod1nonamespace)), // bad child

// 2,3: observe good child
processEvent(makeAddEvent(pod2ns1, pod1ns1)),
assertState(state{
graphNodes: []*node{
makeNode(pod2ns1, withOwners(pod1ns1)),
makeNode(pod1ns1, virtual)},
pendingAttemptToDelete: []*node{
makeNode(pod1ns1, virtual)}, // virtual parent queued for deletion
}),

// 4,5: observe bad child
processEvent(makeAddEvent(node1, pod1nonamespace)),
assertState(state{
graphNodes: []*node{
makeNode(pod2ns1, withOwners(pod1ns1)),
makeNode(node1, withOwners(pod1nonamespace)),
makeNode(pod1ns1, virtual)},
pendingAttemptToDelete: []*node{
makeNode(pod1ns1, virtual), // virtual parent queued for deletion
makeNode(node1, withOwners(pod1nonamespace)), // mismatched child queued for deletion
},
}),

// 6,7: process attemptToDelete of good virtual parent coordinates
processAttemptToDelete(1),
assertState(state{
clientActions: []string{
"get /v1, Resource=pods ns=ns1 name=podname1", // lookup of missing parent, returns 404
},
graphNodes: []*node{
makeNode(node1, withOwners(pod1nonamespace)),
makeNode(pod2ns1, withOwners(pod1ns1)),
makeNode(pod1ns1, virtual)},
pendingGraphChanges: []*event{makeVirtualDeleteEvent(pod1ns1)}, // virtual parent not found, queued virtual delete event
pendingAttemptToDelete: []*node{
makeNode(node1, withOwners(pod1nonamespace)), // mismatched child still queued for deletion
},
}),

// 8,9: process attemptToDelete of bad cluster child
processAttemptToDelete(1),
assertState(state{
clientActions: []string{
"get /v1, Resource=nodes name=nodename", // lookup of existing node
},
graphNodes: []*node{
makeNode(node1, withOwners(pod1nonamespace)),
makeNode(pod2ns1, withOwners(pod1ns1)),
makeNode(pod1ns1, virtual)},
pendingGraphChanges: []*event{makeVirtualDeleteEvent(pod1ns1)}, // virtual parent virtual delete event still enqueued
}),

// 10,11: process virtual delete event for good virtual parent coordinates
processPendingGraphChanges(1),
assertState(state{
graphNodes: []*node{
makeNode(node1, withOwners(pod1nonamespace)),
makeNode(pod2ns1, withOwners(pod1ns1)),
makeNode(pod1nonamespace, virtual)}, // missing virtual parent replaced with alternate coordinates, still virtual
absentOwnerCache: []objectReference{pod1ns1}, // cached absence of missing parent
pendingAttemptToDelete: []*node{
makeNode(pod2ns1, withOwners(pod1ns1)), // good child of missing parent enqueued for deletion
makeNode(pod1nonamespace, virtual), // new virtual parent coordinates enqueued for deletion
},
}),

// 12,13: process attemptToDelete of good child
processAttemptToDelete(1),
assertState(state{
clientActions: []string{
"get /v1, Resource=pods ns=ns1 name=podname2", // lookup of good child
"delete /v1, Resource=pods ns=ns1 name=podname2", // delete of good child
},
graphNodes: []*node{
makeNode(node1, withOwners(pod1nonamespace)),
makeNode(pod2ns1, withOwners(pod1ns1)),
makeNode(pod1nonamespace, virtual)},
absentOwnerCache: []objectReference{pod1ns1},
pendingAttemptToDelete: []*node{
makeNode(pod1nonamespace, virtual), // new virtual parent coordinates enqueued for deletion
},
}),

// 14,15: observe deletion of good child
processEvent(makeDeleteEvent(pod2ns1, pod1ns1)),
assertState(state{
graphNodes: []*node{
makeNode(node1, withOwners(pod1nonamespace)),
makeNode(pod1nonamespace, virtual)},
absentOwnerCache: []objectReference{pod1ns1},
pendingAttemptToDelete: []*node{
makeNode(pod1nonamespace, virtual), // new virtual parent coordinates enqueued for deletion
},
}),

// 16,17: process attemptToDelete of bad virtual parent coordinates
// steady-state is bad cluster child and bad virtual parent coordinates, with no retries
processAttemptToDelete(1),
assertState(state{
graphNodes: []*node{
makeNode(node1, withOwners(pod1nonamespace)),
makeNode(pod1nonamespace, virtual)},
absentOwnerCache: []objectReference{pod1ns1},
}),
},
},
}

alwaysStarted := make(chan struct{})
Expand Down
5 changes: 5 additions & 0 deletions pkg/controller/garbagecollector/graph_builder.go
Expand Up @@ -393,6 +393,11 @@ func (gb *GraphBuilder) addDependentToOwners(n *node, owners []metav1.OwnerRefer
klog.V(2).Infof("node %s references an owner %s with coordinates that do not match the observed identity", n.identity, ownerNode.identity)
}
hasPotentiallyInvalidOwnerReference = true
} else if !ownerIsNamespaced && ownerNode.identity.Namespace != n.identity.Namespace && !ownerNode.isObserved() {
// the ownerNode is cluster-scoped and virtual, and does not match the child node's namespace.
// the owner could be a missing instance of a namespaced type incorrectly referenced by a cluster-scoped child (issue #98040).
// enqueue this child to attemptToDelete to verify parent references.
hasPotentiallyInvalidOwnerReference = true
}
}
}
Expand Down