From 7553631dd6bd26ae9bb41f76e3484a7cde582fe6 Mon Sep 17 00:00:00 2001 From: Mihai IDU Date: Tue, 11 Mar 2025 20:19:03 +0100 Subject: [PATCH 1/3] Filter out unreachable taints from tolerations --- pkg/cli/admin/mustgather/mustgather.go | 158 +++++++++++++++++++++++-- 1 file changed, 149 insertions(+), 9 deletions(-) diff --git a/pkg/cli/admin/mustgather/mustgather.go b/pkg/cli/admin/mustgather/mustgather.go index 8609e86cc2..efbed93047 100644 --- a/pkg/cli/admin/mustgather/mustgather.go +++ b/pkg/cli/admin/mustgather/mustgather.go @@ -51,6 +51,8 @@ import ( const ( gatherContainerName = "gather" + unreachableTaintKey = "node.kubernetes.io/unreachable" + notReadyTaintKey = "node.kubernetes.io/not-ready" ) var ( @@ -98,6 +100,44 @@ sleep 5 done` ) +var ( + tolerationNotReady = corev1.Toleration{ + Key: "node.kubernetes.io/not-ready", + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoSchedule, + } + + tolerationMasterNoSchedule = corev1.Toleration{ + Key: "node-role.kubernetes.io/master", + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoSchedule, + } +) + +// buildNodeSelector builds a node selector from the provided node names. +func buildNodeAffinity(nodeNames []string) *corev1.Affinity { + if len(nodeNames) == 0 { + return nil + } + return &corev1.Affinity{ + NodeAffinity: &corev1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{ + NodeSelectorTerms: []corev1.NodeSelectorTerm{ + { + MatchExpressions: []corev1.NodeSelectorRequirement{ + { + Key: "kubernetes.io/hostname", + Operator: corev1.NodeSelectorOpIn, + Values: nodeNames, + }, + }, + }, + }, + }, + }, + } +} + const ( // number of concurrent must-gather Pods to run if --all-images or multiple --image are provided concurrentMG = 4 @@ -403,6 +443,42 @@ func (o *MustGatherOptions) Validate() error { return nil } +// prioritizeHealthyNodes returns a preferred node to run the must-gather pod on, and a fallback node if no preferred node is found. +func prioritizeHealthyNodes(nodes *corev1.NodeList) (preferred *corev1.Node, fallback *corev1.Node) { + var fallbackNode *corev1.Node + var latestHeartbeat time.Time + + for _, node := range nodes.Items { + var hasUnhealthyTaint bool + for _, taint := range node.Spec.Taints { + if taint.Key == unreachableTaintKey || taint.Key == notReadyTaintKey { + hasUnhealthyTaint = true + break + } + } + + // Look at heartbeat time for readiness hint + for _, cond := range node.Status.Conditions { + if cond.Type == corev1.NodeReady && cond.Status == corev1.ConditionTrue { + // Check if heartbeat is recent (less than 2m old) + if time.Since(cond.LastHeartbeatTime.Time) < 2*time.Minute { + if !hasUnhealthyTaint { + return &node, nil // return immediately on good candidate + } + } + break + } + } + + if fallbackNode == nil || node.CreationTimestamp.Time.After(latestHeartbeat) { + fallbackNode = &node + latestHeartbeat = node.CreationTimestamp.Time + } + } + + return nil, fallbackNode +} + // Run creates and runs a must-gather pod func (o *MustGatherOptions) Run() error { var errs []error @@ -459,15 +535,52 @@ func (o *MustGatherOptions) Run() error { defer cleanupNamespace() } - // Prefer to run in master if there's any but don't be explicit otherwise. - // This enables the command to run by default in hypershift where there's no masters. - nodes, err := o.Client.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{ - LabelSelector: o.NodeSelector, - }) + nodes, err := o.Client.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{}) if err != nil { return err } - var hasMaster bool + + var cpNodes, workerNodes, unschedulableNodes []corev1.Node + for _, node := range nodes.Items { + isReady := false + for _, cond := range node.Status.Conditions { + if cond.Type == corev1.NodeReady && cond.Status == corev1.ConditionTrue { + isReady = true + break + } + } + if !isReady { + continue + } + if node.Spec.Unschedulable { + unschedulableNodes = append(unschedulableNodes, node) + continue + } + if _, ok := node.Labels["node-role.kubernetes.io/control-plane"]; ok { + cpNodes = append(cpNodes, node) + } else { + workerNodes = append(workerNodes, node) + } + } + + selectedNodes := cpNodes + if len(selectedNodes) == 0 { + selectedNodes = workerNodes + } + if len(selectedNodes) == 0 { + selectedNodes = unschedulableNodes + } + + var nodeNames []string + for _, n := range selectedNodes { + nodeNames = append(nodeNames, n.Name) + } + + affinity := buildNodeAffinity(nodeNames) + // If we have no nodes, we cannot run the must-gather pod. + + // Determine if there is a master node + hasMaster := false for _, node := range nodes.Items { if _, ok := node.Labels["node-role.kubernetes.io/master"]; ok { hasMaster = true @@ -496,7 +609,7 @@ func (o *MustGatherOptions) Run() error { return err } for _, node := range nodes.Items { - pods = append(pods, o.newPod(node.Name, image, hasMaster)) + pods = append(pods, o.newPod(node.Name, image, hasMaster, affinity)) } } else { if o.NodeName != "" { @@ -506,7 +619,7 @@ func (o *MustGatherOptions) Run() error { return err } } - pods = append(pods, o.newPod(o.NodeName, image, hasMaster)) + pods = append(pods, o.newPod(o.NodeName, image, hasMaster, affinity)) } } @@ -923,7 +1036,8 @@ func newClusterRoleBinding(ns string) *rbacv1.ClusterRoleBinding { // newPod creates a pod with 2 containers with a shared volume mount: // - gather: init containers that run gather command // - copy: no-op container we can exec into -func (o *MustGatherOptions) newPod(node, image string, hasMaster bool) *corev1.Pod { +func (o *MustGatherOptions) newPod(node, image string, hasMaster bool, affinity *corev1.Affinity) *corev1.Pod { + zero := int64(0) nodeSelector := map[string]string{ @@ -941,6 +1055,31 @@ func (o *MustGatherOptions) newPod(node, image string, hasMaster bool) *corev1.P cleanedSourceDir := path.Clean(o.SourceDir) volumeUsageChecker := fmt.Sprintf(volumeUsageCheckerScript, cleanedSourceDir, cleanedSourceDir, o.VolumePercentage, o.VolumePercentage, executedCommand) + // Define taints we do not want to tolerate (can be extended with user input in the future) + excludedTaints := []corev1.Taint{ + {Key: unreachableTaintKey, Effect: corev1.TaintEffectNoExecute}, + {Key: notReadyTaintKey, Effect: corev1.TaintEffectNoSchedule}, + } + + // Define candidate tolerations we may want to apply + candidateTolerations := []corev1.Toleration{tolerationNotReady} + if node == "" && hasMaster { + candidateTolerations = append(candidateTolerations, tolerationMasterNoSchedule) + } + + // Build the toleration list by only adding tolerations that do NOT tolerate excluded taints + filteredTolerations := make([]corev1.Toleration, 0, len(candidateTolerations)) +TolerationLoop: + for _, tol := range candidateTolerations { + for _, excluded := range excludedTaints { + if tol.ToleratesTaint(&excluded) { + // Skip this toleration if it tolerates an excluded taint + continue TolerationLoop + } + } + filteredTolerations = append(filteredTolerations, tol) + } + ret := &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ GenerateName: "must-gather-", @@ -1023,6 +1162,7 @@ func (o *MustGatherOptions) newPod(node, image string, hasMaster bool) *corev1.P Operator: "Exists", }, }, + Affinity: affinity, }, } if o.HostNetwork { From 74c5a47957c778969c0b64e315c7433d160c1944 Mon Sep 17 00:00:00 2001 From: Jan Chaloupka Date: Tue, 15 Jul 2025 15:07:55 +0200 Subject: [PATCH 2/3] Prefer ready and reachable control plane nodes --- pkg/cli/admin/mustgather/mustgather.go | 240 ++++++++++---------- pkg/cli/admin/mustgather/mustgather_test.go | 200 ++++++++++++++++ 2 files changed, 326 insertions(+), 114 deletions(-) diff --git a/pkg/cli/admin/mustgather/mustgather.go b/pkg/cli/admin/mustgather/mustgather.go index efbed93047..0a52ce9038 100644 --- a/pkg/cli/admin/mustgather/mustgather.go +++ b/pkg/cli/admin/mustgather/mustgather.go @@ -9,6 +9,7 @@ import ( "os" "path" "regexp" + "sort" "strconv" "strings" "sync" @@ -38,6 +39,7 @@ import ( "k8s.io/kubectl/pkg/util/templates" admissionapi "k8s.io/pod-security-admission/api" "k8s.io/utils/exec" + utilptr "k8s.io/utils/ptr" configclient "github.com/openshift/client-go/config/clientset/versioned" imagev1client "github.com/openshift/client-go/image/clientset/versioned/typed/image/v1" @@ -50,9 +52,10 @@ import ( ) const ( - gatherContainerName = "gather" - unreachableTaintKey = "node.kubernetes.io/unreachable" - notReadyTaintKey = "node.kubernetes.io/not-ready" + gatherContainerName = "gather" + unreachableTaintKey = "node.kubernetes.io/unreachable" + notReadyTaintKey = "node.kubernetes.io/not-ready" + controlPlaneNodeRoleLabel = "node-role.kubernetes.io/control-plane" ) var ( @@ -100,23 +103,9 @@ sleep 5 done` ) -var ( - tolerationNotReady = corev1.Toleration{ - Key: "node.kubernetes.io/not-ready", - Operator: corev1.TolerationOpExists, - Effect: corev1.TaintEffectNoSchedule, - } - - tolerationMasterNoSchedule = corev1.Toleration{ - Key: "node-role.kubernetes.io/master", - Operator: corev1.TolerationOpExists, - Effect: corev1.TaintEffectNoSchedule, - } -) - -// buildNodeSelector builds a node selector from the provided node names. -func buildNodeAffinity(nodeNames []string) *corev1.Affinity { - if len(nodeNames) == 0 { +// buildNodeAffinity builds a node affinity from the provided node hostnames. +func buildNodeAffinity(nodeHostnames []string) *corev1.Affinity { + if len(nodeHostnames) == 0 { return nil } return &corev1.Affinity{ @@ -128,7 +117,7 @@ func buildNodeAffinity(nodeNames []string) *corev1.Affinity { { Key: "kubernetes.io/hostname", Operator: corev1.NodeSelectorOpIn, - Values: nodeNames, + Values: nodeHostnames, }, }, }, @@ -443,40 +432,123 @@ func (o *MustGatherOptions) Validate() error { return nil } -// prioritizeHealthyNodes returns a preferred node to run the must-gather pod on, and a fallback node if no preferred node is found. -func prioritizeHealthyNodes(nodes *corev1.NodeList) (preferred *corev1.Node, fallback *corev1.Node) { - var fallbackNode *corev1.Node - var latestHeartbeat time.Time +func getNodeLastHeartbeatTime(node corev1.Node) *metav1.Time { + for _, cond := range node.Status.Conditions { + if cond.Type == corev1.NodeReady { + if !cond.LastHeartbeatTime.IsZero() { + return utilptr.To[metav1.Time](cond.LastHeartbeatTime) + } + return nil + } + } + return nil +} + +func isNodeReadyByCondition(node corev1.Node) bool { + for _, cond := range node.Status.Conditions { + if cond.Type == corev1.NodeReady && cond.Status == corev1.ConditionTrue { + return true + } + } + return false +} +func isNodeReadyAndReachableByTaint(node corev1.Node) bool { + for _, taint := range node.Spec.Taints { + if taint.Key == unreachableTaintKey || taint.Key == notReadyTaintKey { + return false + } + } + return true +} + +// getCandidateNodeNames identifies suitable nodes for constructing a list of +// node names for a node affinity expression. +func getCandidateNodeNames(nodes *corev1.NodeList, hasMaster bool) []string { + // Identify ready and reacheable nodes + var controlPlaneNodes, allControlPlaneNodes, workerNodes, unschedulableNodes, remainingNodes, selectedNodes []corev1.Node for _, node := range nodes.Items { - var hasUnhealthyTaint bool - for _, taint := range node.Spec.Taints { - if taint.Key == unreachableTaintKey || taint.Key == notReadyTaintKey { - hasUnhealthyTaint = true - break - } + if _, ok := node.Labels[controlPlaneNodeRoleLabel]; ok { + allControlPlaneNodes = append(allControlPlaneNodes, node) } + if !isNodeReadyByCondition(node) || !isNodeReadyAndReachableByTaint(node) { + remainingNodes = append(remainingNodes, node) + continue + } + if node.Spec.Unschedulable { + unschedulableNodes = append(unschedulableNodes, node) + continue + } + if _, ok := node.Labels[controlPlaneNodeRoleLabel]; ok { + controlPlaneNodes = append(controlPlaneNodes, node) + } else { + workerNodes = append(workerNodes, node) + } + } - // Look at heartbeat time for readiness hint - for _, cond := range node.Status.Conditions { - if cond.Type == corev1.NodeReady && cond.Status == corev1.ConditionTrue { - // Check if heartbeat is recent (less than 2m old) - if time.Since(cond.LastHeartbeatTime.Time) < 2*time.Minute { - if !hasUnhealthyTaint { - return &node, nil // return immediately on good candidate - } - } - break - } + // INFO(ingvagabund): a single target node should be enough. Yet, + // it might help to provide more to allow the scheduler choose a more + // suitable node based on the cluster scheduling capabilities. + + // hasMaster cause the must-gather pod to set a node selector targeting all + // control plane node. So there's no point of processing other than control + // plane nodes + if hasMaster { + if len(controlPlaneNodes) > 0 { + selectedNodes = controlPlaneNodes + } else { + selectedNodes = allControlPlaneNodes + } + } else { + // For hypershift case + + // Order of preference: + // - ready and reachable control plane nodes first + // - then ready and reachable worker nodes + // - then ready and reachable unschedulable nodes + // - then any other node + selectedNodes = controlPlaneNodes // this will be very likely empty for hypershift + if len(selectedNodes) == 0 { + selectedNodes = workerNodes + } + if len(selectedNodes) == 0 { + // unschedulable nodes might still be better candidates than the remaining + // not ready and not reacheable nodes + selectedNodes = unschedulableNodes + } + if len(selectedNodes) == 0 { + // whatever is left for the last resort + selectedNodes = remainingNodes } + } - if fallbackNode == nil || node.CreationTimestamp.Time.After(latestHeartbeat) { - fallbackNode = &node - latestHeartbeat = node.CreationTimestamp.Time + // Sort nodes based on the cond.LastHeartbeatTime.Time to prefer nodes that + // reported their status most recently + sort.SliceStable(selectedNodes, func(i, j int) bool { + iTime := getNodeLastHeartbeatTime(selectedNodes[i]) + jTime := getNodeLastHeartbeatTime(selectedNodes[j]) + // iTime has no effect since all is sorted right + if jTime == nil { + return true + } + if iTime == nil { + return false + } + return jTime.Before(iTime) + }) + + // Limit the number of nodes to 10 at most to provide a sane list of nodes + nodeNames := []string{} + var nodeNamesSize = 0 + for _, n := range selectedNodes { + nodeNames = append(nodeNames, n.Name) + nodeNamesSize++ + if nodeNamesSize >= 10 { + break } } - return nil, fallbackNode + return nodeNames } // Run creates and runs a must-gather pod @@ -535,52 +607,15 @@ func (o *MustGatherOptions) Run() error { defer cleanupNamespace() } - nodes, err := o.Client.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{}) + // Prefer to run in master if there's any but don't be explicit otherwise. + // This enables the command to run by default in hypershift where there's no masters. + nodes, err := o.Client.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{ + LabelSelector: o.NodeSelector, + }) if err != nil { return err } - - var cpNodes, workerNodes, unschedulableNodes []corev1.Node - for _, node := range nodes.Items { - isReady := false - for _, cond := range node.Status.Conditions { - if cond.Type == corev1.NodeReady && cond.Status == corev1.ConditionTrue { - isReady = true - break - } - } - if !isReady { - continue - } - if node.Spec.Unschedulable { - unschedulableNodes = append(unschedulableNodes, node) - continue - } - if _, ok := node.Labels["node-role.kubernetes.io/control-plane"]; ok { - cpNodes = append(cpNodes, node) - } else { - workerNodes = append(workerNodes, node) - } - } - - selectedNodes := cpNodes - if len(selectedNodes) == 0 { - selectedNodes = workerNodes - } - if len(selectedNodes) == 0 { - selectedNodes = unschedulableNodes - } - - var nodeNames []string - for _, n := range selectedNodes { - nodeNames = append(nodeNames, n.Name) - } - - affinity := buildNodeAffinity(nodeNames) - // If we have no nodes, we cannot run the must-gather pod. - - // Determine if there is a master node - hasMaster := false + var hasMaster bool for _, node := range nodes.Items { if _, ok := node.Labels["node-role.kubernetes.io/master"]; ok { hasMaster = true @@ -588,6 +623,8 @@ func (o *MustGatherOptions) Run() error { } } + affinity := buildNodeAffinity(getCandidateNodeNames(nodes, hasMaster)) + // ... and create must-gather pod(s) var pods []*corev1.Pod for _, image := range o.Images { @@ -1055,31 +1092,6 @@ func (o *MustGatherOptions) newPod(node, image string, hasMaster bool, affinity cleanedSourceDir := path.Clean(o.SourceDir) volumeUsageChecker := fmt.Sprintf(volumeUsageCheckerScript, cleanedSourceDir, cleanedSourceDir, o.VolumePercentage, o.VolumePercentage, executedCommand) - // Define taints we do not want to tolerate (can be extended with user input in the future) - excludedTaints := []corev1.Taint{ - {Key: unreachableTaintKey, Effect: corev1.TaintEffectNoExecute}, - {Key: notReadyTaintKey, Effect: corev1.TaintEffectNoSchedule}, - } - - // Define candidate tolerations we may want to apply - candidateTolerations := []corev1.Toleration{tolerationNotReady} - if node == "" && hasMaster { - candidateTolerations = append(candidateTolerations, tolerationMasterNoSchedule) - } - - // Build the toleration list by only adding tolerations that do NOT tolerate excluded taints - filteredTolerations := make([]corev1.Toleration, 0, len(candidateTolerations)) -TolerationLoop: - for _, tol := range candidateTolerations { - for _, excluded := range excludedTaints { - if tol.ToleratesTaint(&excluded) { - // Skip this toleration if it tolerates an excluded taint - continue TolerationLoop - } - } - filteredTolerations = append(filteredTolerations, tol) - } - ret := &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ GenerateName: "must-gather-", diff --git a/pkg/cli/admin/mustgather/mustgather_test.go b/pkg/cli/admin/mustgather/mustgather_test.go index f61ba015ee..768d6dde9d 100644 --- a/pkg/cli/admin/mustgather/mustgather_test.go +++ b/pkg/cli/admin/mustgather/mustgather_test.go @@ -2,8 +2,10 @@ package mustgather import ( "context" + "fmt" "reflect" "testing" + "time" corev1 "k8s.io/api/core/v1" k8sapierrors "k8s.io/apimachinery/pkg/api/errors" @@ -239,3 +241,201 @@ func TestGetNamespace(t *testing.T) { }) } } + +func buildTestNode(name string, apply func(*corev1.Node)) *corev1.Node { + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + SelfLink: fmt.Sprintf("/api/v1/nodes/%s", name), + Labels: map[string]string{}, + }, + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{ + {Type: corev1.NodeReady, Status: corev1.ConditionTrue}, + }, + }, + } + if apply != nil { + apply(node) + } + return node +} + +func buildTestControlPlaneNode(name string, apply func(*corev1.Node)) *corev1.Node { + node := buildTestNode(name, apply) + node.ObjectMeta.Labels[controlPlaneNodeRoleLabel] = "" + return node +} + +func TestGetCandidateNodeNames(t *testing.T) { + tests := []struct { + description string + nodeList *corev1.NodeList + hasMaster bool + nodeNames []string + }{ + { + description: "No nodes are ready and reacheable (hasMaster=true)", + nodeList: &corev1.NodeList{}, + hasMaster: true, + nodeNames: []string{}, + }, + { + description: "No nodes are ready and reacheable (hasMaster=false)", + nodeList: &corev1.NodeList{}, + hasMaster: false, + nodeNames: []string{}, + }, + { + description: "Control plane nodes are ready and reacheable", + nodeList: &corev1.NodeList{ + Items: []corev1.Node{ + *buildTestControlPlaneNode("controlplane1", nil), + *buildTestControlPlaneNode("controlplane2", nil), + }, + }, + hasMaster: true, + nodeNames: []string{"controlplane2", "controlplane1"}, + }, + { + description: "Some control plane nodes are not ready or reacheable", + nodeList: &corev1.NodeList{ + Items: []corev1.Node{ + *buildTestControlPlaneNode("controlplane1", func(node *corev1.Node) { + node.Status.Conditions = []corev1.NodeCondition{ + {Type: corev1.NodeReady, Status: corev1.ConditionFalse}, + } + }), + *buildTestControlPlaneNode("controlplane2", nil), + *buildTestControlPlaneNode("controlplane3", func(node *corev1.Node) { + node.Spec.Taints = []corev1.Taint{ + { + Key: unreachableTaintKey, + }, + } + }), + *buildTestControlPlaneNode("controlplane4", func(node *corev1.Node) { + node.Spec.Taints = []corev1.Taint{ + { + Key: notReadyTaintKey, + }, + } + }), + }, + }, + hasMaster: true, + nodeNames: []string{"controlplane2"}, + }, + { + description: "Mix of control plane and worker nodes (at least one reachable and ready control plane node)", + nodeList: &corev1.NodeList{ + Items: []corev1.Node{ + *buildTestControlPlaneNode("controlplane1", func(node *corev1.Node) { + node.Status.Conditions = []corev1.NodeCondition{ + {Type: corev1.NodeReady, Status: corev1.ConditionFalse}, + } + }), + *buildTestControlPlaneNode("controlplane2", nil), + *buildTestNode("controlplane3", nil), + }, + }, + hasMaster: true, + nodeNames: []string{"controlplane2"}, + }, + { + description: "Mix of control plane and worker nodes (no ready and reachable control plane node)", + nodeList: &corev1.NodeList{ + Items: []corev1.Node{ + *buildTestControlPlaneNode("controlplane1", func(node *corev1.Node) { + node.Status.Conditions = []corev1.NodeCondition{ + {Type: corev1.NodeReady, Status: corev1.ConditionFalse}, + } + }), + *buildTestControlPlaneNode("controlplane2", func(node *corev1.Node) { + node.Spec.Taints = []corev1.Taint{ + { + Key: unreachableTaintKey, + }, + } + }), + *buildTestControlPlaneNode("controlplane3", func(node *corev1.Node) { + node.Spec.Taints = []corev1.Taint{ + { + Key: notReadyTaintKey, + }, + } + }), + *buildTestControlPlaneNode("worker1", nil), + }, + }, + hasMaster: true, + nodeNames: []string{"worker1"}, + }, + { + description: "Mix of control plane and worker nodes (no ready and not reachable nodes with unschedulable)", + nodeList: &corev1.NodeList{ + Items: []corev1.Node{ + *buildTestControlPlaneNode("controlplane1", func(node *corev1.Node) { + node.Status.Conditions = []corev1.NodeCondition{ + {Type: corev1.NodeReady, Status: corev1.ConditionFalse}, + } + }), + *buildTestControlPlaneNode("controlplane2", func(node *corev1.Node) { + node.Spec.Taints = []corev1.Taint{ + { + Key: unreachableTaintKey, + }, + } + }), + *buildTestControlPlaneNode("controlplane3", func(node *corev1.Node) { + node.Spec.Taints = []corev1.Taint{ + { + Key: notReadyTaintKey, + }, + } + }), + *buildTestControlPlaneNode("worker1", func(node *corev1.Node) { + node.Spec.Unschedulable = true + }), + }, + }, + nodeNames: []string{"worker1"}, + }, + { + description: "No ready and reachable nodes (nodes with the recent teartbeat time are sorted left)", + nodeList: &corev1.NodeList{ + Items: []corev1.Node{ + *buildTestControlPlaneNode("controlplane1", func(node *corev1.Node) { + node.Status.Conditions = []corev1.NodeCondition{ + {Type: corev1.NodeReady, Status: corev1.ConditionFalse}, + } + }), + *buildTestControlPlaneNode("controlplane2", func(node *corev1.Node) { + node.Status.Conditions = []corev1.NodeCondition{} + }), + *buildTestNode("worker1", func(node *corev1.Node) { + node.Status.Conditions = []corev1.NodeCondition{ + {Type: corev1.NodeReady, Status: corev1.ConditionFalse, LastHeartbeatTime: metav1.Time{Time: time.Date(2025, time.July, 10, 10, 20, 0, 0, time.UTC)}}, + } + }), + *buildTestNode("other1", func(node *corev1.Node) { + node.Status.Conditions = []corev1.NodeCondition{ + {Type: corev1.NodeReady, Status: corev1.ConditionFalse, LastHeartbeatTime: metav1.Time{Time: time.Date(2025, time.July, 10, 10, 30, 0, 0, time.UTC)}}, + } + }), + }, + }, + hasMaster: false, + nodeNames: []string{"other1", "worker1", "controlplane2", "controlplane1"}, + }, + } + + for _, test := range tests { + t.Run(test.description, func(t *testing.T) { + names := getCandidateNodeNames(test.nodeList, test.hasMaster) + if !reflect.DeepEqual(test.nodeNames, names) { + t.Fatalf("Expected and computed list of node names differ. Expected %#v. Got %#v.", test.nodeNames, names) + } + }) + } +} From 4dfb69be48b246ff2200e7b268f23e2d8b5223a3 Mon Sep 17 00:00:00 2001 From: Jan Chaloupka Date: Fri, 7 Nov 2025 14:20:30 +0100 Subject: [PATCH 3/3] fix(must-gather): do not set node affinity if nodename is set --- pkg/cli/admin/mustgather/mustgather.go | 85 +++++++---- pkg/cli/admin/mustgather/mustgather_test.go | 152 ++++++++++++++++++++ 2 files changed, 209 insertions(+), 28 deletions(-) diff --git a/pkg/cli/admin/mustgather/mustgather.go b/pkg/cli/admin/mustgather/mustgather.go index 0a52ce9038..d87ed1acda 100644 --- a/pkg/cli/admin/mustgather/mustgather.go +++ b/pkg/cli/admin/mustgather/mustgather.go @@ -56,6 +56,10 @@ const ( unreachableTaintKey = "node.kubernetes.io/unreachable" notReadyTaintKey = "node.kubernetes.io/not-ready" controlPlaneNodeRoleLabel = "node-role.kubernetes.io/control-plane" + masterNodeRoleLabel = "node-role.kubernetes.io/master" + defaultMustGatherCommand = "/usr/bin/gather" + defaultVolumePercentage = 30 + defaultSourceDir = "/must-gather/" ) var ( @@ -169,10 +173,10 @@ func NewMustGatherCommand(f kcmdutil.Factory, streams genericiooptions.IOStreams func NewMustGatherOptions(streams genericiooptions.IOStreams) *MustGatherOptions { opts := &MustGatherOptions{ - SourceDir: "/must-gather/", + SourceDir: defaultSourceDir, IOStreams: streams, Timeout: 10 * time.Minute, - VolumePercentage: 30, + VolumePercentage: defaultVolumePercentage, } opts.LogOut = opts.newPrefixWriter(streams.Out, "[must-gather ] OUT", false, true) opts.RawOut = opts.newPrefixWriter(streams.Out, "", false, false) @@ -617,7 +621,7 @@ func (o *MustGatherOptions) Run() error { } var hasMaster bool for _, node := range nodes.Items { - if _, ok := node.Labels["node-role.kubernetes.io/master"]; ok { + if _, ok := node.Labels[masterNodeRoleLabel]; ok { hasMaster = true break } @@ -1070,29 +1074,13 @@ func newClusterRoleBinding(ns string) *rbacv1.ClusterRoleBinding { } } -// newPod creates a pod with 2 containers with a shared volume mount: -// - gather: init containers that run gather command -// - copy: no-op container we can exec into -func (o *MustGatherOptions) newPod(node, image string, hasMaster bool, affinity *corev1.Affinity) *corev1.Pod { - +func defaultMustGatherPod(image string) *corev1.Pod { zero := int64(0) - nodeSelector := map[string]string{ - corev1.LabelOSStable: "linux", - } - if node == "" && hasMaster { - nodeSelector["node-role.kubernetes.io/master"] = "" - } - - executedCommand := "/usr/bin/gather" - if len(o.Command) > 0 { - executedCommand = strings.Join(o.Command, " ") - } + cleanedSourceDir := path.Clean(defaultSourceDir) + volumeUsageChecker := fmt.Sprintf(volumeUsageCheckerScript, cleanedSourceDir, cleanedSourceDir, defaultVolumePercentage, defaultVolumePercentage, defaultMustGatherCommand) - cleanedSourceDir := path.Clean(o.SourceDir) - volumeUsageChecker := fmt.Sprintf(volumeUsageCheckerScript, cleanedSourceDir, cleanedSourceDir, o.VolumePercentage, o.VolumePercentage, executedCommand) - - ret := &corev1.Pod{ + return &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ GenerateName: "must-gather-", Labels: map[string]string{ @@ -1100,7 +1088,6 @@ func (o *MustGatherOptions) newPod(node, image string, hasMaster bool, affinity }, }, Spec: corev1.PodSpec{ - NodeName: node, // This pod is ok to be OOMKilled but not preempted. Following the conventions mentioned at: // https://github.com/openshift/enhancements/blob/master/CONVENTIONS.md#priority-classes // so setting priority class to system-cluster-critical @@ -1120,7 +1107,7 @@ func (o *MustGatherOptions) newPod(node, image string, hasMaster bool, affinity Image: image, ImagePullPolicy: corev1.PullIfNotPresent, // always force disk flush to ensure that all data gathered is accessible in the copy container - Command: []string{"/bin/bash", "-c", fmt.Sprintf("%s & %s; sync", volumeUsageChecker, executedCommand)}, + Command: []string{"/bin/bash", "-c", fmt.Sprintf("%s & %s; sync", volumeUsageChecker, defaultMustGatherCommand)}, Env: []corev1.EnvVar{ { Name: "NODE_NAME", @@ -1163,8 +1150,9 @@ func (o *MustGatherOptions) newPod(node, image string, hasMaster bool, affinity }, }, }, - HostNetwork: o.HostNetwork, - NodeSelector: nodeSelector, + NodeSelector: map[string]string{ + corev1.LabelOSStable: "linux", + }, TerminationGracePeriodSeconds: &zero, Tolerations: []corev1.Toleration{ { @@ -1174,9 +1162,50 @@ func (o *MustGatherOptions) newPod(node, image string, hasMaster bool, affinity Operator: "Exists", }, }, - Affinity: affinity, }, } +} + +// newPod creates a pod with 2 containers with a shared volume mount: +// - gather: init containers that run gather command +// - copy: no-op container we can exec into +func (o *MustGatherOptions) newPod(node, image string, hasMaster bool, affinity *corev1.Affinity) *corev1.Pod { + executedCommand := defaultMustGatherCommand + if len(o.Command) > 0 { + executedCommand = strings.Join(o.Command, " ") + } + + cleanedSourceDir := path.Clean(o.SourceDir) + volumeUsageChecker := fmt.Sprintf(volumeUsageCheckerScript, cleanedSourceDir, cleanedSourceDir, o.VolumePercentage, o.VolumePercentage, executedCommand) + + ret := defaultMustGatherPod(image) + ret.Spec.Containers[0].Command = []string{"/bin/bash", "-c", fmt.Sprintf("%s & %s; sync", volumeUsageChecker, executedCommand)} + ret.Spec.Containers[0].VolumeMounts = []corev1.VolumeMount{{ + Name: "must-gather-output", + MountPath: cleanedSourceDir, + ReadOnly: false, + }} + ret.Spec.Containers[1].VolumeMounts = []corev1.VolumeMount{{ + Name: "must-gather-output", + MountPath: cleanedSourceDir, + ReadOnly: false, + }} + ret.Spec.HostNetwork = o.HostNetwork + if node == "" && hasMaster { + ret.Spec.NodeSelector[masterNodeRoleLabel] = "" + } + + if node != "" { + ret.Spec.NodeName = node + } else { + // Set affinity towards the most suitable nodes. E.g. to exclude + // nodes that if unreachable could cause the must-gather pod to stay + // in Pending state indefinitely. Please check getCandidateNodeNames + // function for more details about how the selection of the most + // suitable nodes is performed. + ret.Spec.Affinity = affinity + } + if o.HostNetwork { // If a user specified hostNetwork he might have intended to perform // packet captures on the host, for that we need to set the correct diff --git a/pkg/cli/admin/mustgather/mustgather_test.go b/pkg/cli/admin/mustgather/mustgather_test.go index 768d6dde9d..dda0c0929a 100644 --- a/pkg/cli/admin/mustgather/mustgather_test.go +++ b/pkg/cli/admin/mustgather/mustgather_test.go @@ -3,6 +3,7 @@ package mustgather import ( "context" "fmt" + "path" "reflect" "testing" "time" @@ -19,6 +20,8 @@ import ( "k8s.io/client-go/kubernetes/fake" "k8s.io/utils/diff" + "github.com/google/go-cmp/cmp" + configv1 "github.com/openshift/api/config/v1" imagev1 "github.com/openshift/api/image/v1" configv1fake "github.com/openshift/client-go/config/clientset/versioned/fake" @@ -439,3 +442,152 @@ func TestGetCandidateNodeNames(t *testing.T) { }) } } + +func TestNewPod(t *testing.T) { + generateMustGatherPod := func(image string, update func(pod *corev1.Pod)) *corev1.Pod { + pod := defaultMustGatherPod(image) + update(pod) + return pod + } + + tests := []struct { + name string + options *MustGatherOptions + hasMasters bool + node string + affinity *corev1.Affinity + expectedPod *corev1.Pod + }{ + { + name: "node set, affinity provided, affinity not set", + options: &MustGatherOptions{ + VolumePercentage: defaultVolumePercentage, + SourceDir: defaultSourceDir, + }, + node: "node1", + affinity: buildNodeAffinity([]string{"node2"}), + expectedPod: generateMustGatherPod("image", func(pod *corev1.Pod) { + pod.Spec.NodeName = "node1" + }), + }, + { + name: "node not set, affinity provided, affinity set", + options: &MustGatherOptions{ + VolumePercentage: defaultVolumePercentage, + SourceDir: defaultSourceDir, + }, + affinity: buildNodeAffinity([]string{"node2"}), + expectedPod: generateMustGatherPod("image", func(pod *corev1.Pod) { + pod.Spec.Affinity = buildNodeAffinity([]string{"node2"}) + }), + }, + { + name: "custom source dir", + options: &MustGatherOptions{ + VolumePercentage: defaultVolumePercentage, + SourceDir: "custom-source-dir", + }, + expectedPod: generateMustGatherPod("image", func(pod *corev1.Pod) { + volumeUsageChecker := fmt.Sprintf(volumeUsageCheckerScript, "custom-source-dir", "custom-source-dir", defaultVolumePercentage, defaultVolumePercentage, defaultMustGatherCommand) + podCmd := fmt.Sprintf("%s & %s; sync", volumeUsageChecker, defaultMustGatherCommand) + pod.Spec.Containers[0].Command = []string{"/bin/bash", "-c", podCmd} + pod.Spec.Containers[0].VolumeMounts = []corev1.VolumeMount{{ + Name: "must-gather-output", + MountPath: "custom-source-dir", + }} + pod.Spec.Containers[1].VolumeMounts = []corev1.VolumeMount{{ + Name: "must-gather-output", + MountPath: "custom-source-dir", + }} + }), + }, + { + name: "host network", + options: &MustGatherOptions{ + VolumePercentage: defaultVolumePercentage, + SourceDir: defaultSourceDir, + HostNetwork: true, + }, + expectedPod: generateMustGatherPod("image", func(pod *corev1.Pod) { + pod.Spec.HostNetwork = true + pod.Spec.Containers[0].SecurityContext = &corev1.SecurityContext{ + Capabilities: &corev1.Capabilities{ + Add: []corev1.Capability{ + corev1.Capability("CAP_NET_RAW"), + }, + }, + } + }), + }, + { + name: "with control plane nodes", + options: &MustGatherOptions{ + VolumePercentage: defaultVolumePercentage, + SourceDir: defaultSourceDir, + }, + hasMasters: true, + expectedPod: generateMustGatherPod("image", func(pod *corev1.Pod) { + pod.Spec.NodeSelector[masterNodeRoleLabel] = "" + }), + }, + { + name: "with custom command", + options: &MustGatherOptions{ + VolumePercentage: defaultVolumePercentage, + SourceDir: defaultSourceDir, + Command: []string{"custom_command", "with_params"}, + }, + expectedPod: generateMustGatherPod("image", func(pod *corev1.Pod) { + cleanSourceDir := path.Clean(defaultSourceDir) + volumeUsageChecker := fmt.Sprintf(volumeUsageCheckerScript, cleanSourceDir, cleanSourceDir, defaultVolumePercentage, defaultVolumePercentage, "custom_command with_params") + podCmd := fmt.Sprintf("%s & %s; sync", volumeUsageChecker, "custom_command with_params") + pod.Spec.Containers[0].Command = []string{"/bin/bash", "-c", podCmd} + pod.Spec.Containers[0].VolumeMounts = []corev1.VolumeMount{{ + Name: "must-gather-output", + MountPath: cleanSourceDir, + }} + pod.Spec.Containers[1].VolumeMounts = []corev1.VolumeMount{{ + Name: "must-gather-output", + MountPath: cleanSourceDir, + }} + }), + }, + { + name: "with since", + options: &MustGatherOptions{ + VolumePercentage: defaultVolumePercentage, + SourceDir: defaultSourceDir, + Since: 2 * time.Minute, + }, + expectedPod: generateMustGatherPod("image", func(pod *corev1.Pod) { + pod.Spec.Containers[0].Env = append(pod.Spec.Containers[0].Env, corev1.EnvVar{ + Name: "MUST_GATHER_SINCE", + Value: "2m0s", + }) + }), + }, + { + name: "with sincetime", + options: &MustGatherOptions{ + VolumePercentage: defaultVolumePercentage, + SourceDir: defaultSourceDir, + SinceTime: "2023-09-24T15:30:00Z", + }, + expectedPod: generateMustGatherPod("image", func(pod *corev1.Pod) { + pod.Spec.Containers[0].Env = append(pod.Spec.Containers[0].Env, corev1.EnvVar{ + Name: "MUST_GATHER_SINCE_TIME", + Value: "2023-09-24T15:30:00Z", + }) + }), + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + tPod := test.options.newPod(test.node, "image", test.hasMasters, test.affinity) + if !cmp.Equal(test.expectedPod, tPod) { + t.Errorf("Unexpected pod command was generated: \n%s\n", cmp.Diff(test.expectedPod, tPod)) + } + }) + } +}