diff --git a/pkg/cli/admin/mustgather/mustgather.go b/pkg/cli/admin/mustgather/mustgather.go index a7a6b32b27..46f7869b77 100644 --- a/pkg/cli/admin/mustgather/mustgather.go +++ b/pkg/cli/admin/mustgather/mustgather.go @@ -56,6 +56,10 @@ const ( unreachableTaintKey = "node.kubernetes.io/unreachable" notReadyTaintKey = "node.kubernetes.io/not-ready" controlPlaneNodeRoleLabel = "node-role.kubernetes.io/control-plane" + masterNodeRoleLabel = "node-role.kubernetes.io/master" + defaultMustGatherCommand = "/usr/bin/gather" + defaultVolumePercentage = 30 + defaultSourceDir = "/must-gather/" ) var ( @@ -169,10 +173,10 @@ func NewMustGatherCommand(f kcmdutil.Factory, streams genericiooptions.IOStreams func NewMustGatherOptions(streams genericiooptions.IOStreams) *MustGatherOptions { opts := &MustGatherOptions{ - SourceDir: "/must-gather/", + SourceDir: defaultSourceDir, IOStreams: streams, Timeout: 10 * time.Minute, - VolumePercentage: 30, + VolumePercentage: defaultVolumePercentage, } opts.LogOut = opts.newPrefixWriter(streams.Out, "[must-gather ] OUT", false, true) opts.RawOut = opts.newPrefixWriter(streams.Out, "", false, false) @@ -617,7 +621,7 @@ func (o *MustGatherOptions) Run() error { } var hasMaster bool for _, node := range nodes.Items { - if _, ok := node.Labels["node-role.kubernetes.io/master"]; ok { + if _, ok := node.Labels[masterNodeRoleLabel]; ok { hasMaster = true break } @@ -1071,29 +1075,13 @@ func newClusterRoleBinding(ns *corev1.Namespace) *rbacv1.ClusterRoleBinding { } } -// newPod creates a pod with 2 containers with a shared volume mount: -// - gather: init containers that run gather command -// - copy: no-op container we can exec into -func (o *MustGatherOptions) newPod(node, image string, hasMaster bool, affinity *corev1.Affinity) *corev1.Pod { - +func defaultMustGatherPod(image string) *corev1.Pod { zero := int64(0) - nodeSelector := map[string]string{ - corev1.LabelOSStable: "linux", - } - if node == "" && hasMaster { - nodeSelector["node-role.kubernetes.io/master"] = "" - } - - executedCommand := "/usr/bin/gather" - if len(o.Command) > 0 { - executedCommand = strings.Join(o.Command, " ") - } + cleanedSourceDir := path.Clean(defaultSourceDir) + volumeUsageChecker := fmt.Sprintf(volumeUsageCheckerScript, cleanedSourceDir, cleanedSourceDir, defaultVolumePercentage, defaultVolumePercentage, defaultMustGatherCommand) - cleanedSourceDir := path.Clean(o.SourceDir) - volumeUsageChecker := fmt.Sprintf(volumeUsageCheckerScript, cleanedSourceDir, cleanedSourceDir, o.VolumePercentage, o.VolumePercentage, executedCommand) - - ret := &corev1.Pod{ + return &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ GenerateName: "must-gather-", Labels: map[string]string{ @@ -1101,7 +1089,6 @@ func (o *MustGatherOptions) newPod(node, image string, hasMaster bool, affinity }, }, Spec: corev1.PodSpec{ - NodeName: node, // This pod is ok to be OOMKilled but not preempted. Following the conventions mentioned at: // https://github.com/openshift/enhancements/blob/master/CONVENTIONS.md#priority-classes // so setting priority class to system-cluster-critical @@ -1121,7 +1108,7 @@ func (o *MustGatherOptions) newPod(node, image string, hasMaster bool, affinity Image: image, ImagePullPolicy: corev1.PullIfNotPresent, // always force disk flush to ensure that all data gathered is accessible in the copy container - Command: []string{"/bin/bash", "-c", fmt.Sprintf("%s & %s; sync", volumeUsageChecker, executedCommand)}, + Command: []string{"/bin/bash", "-c", fmt.Sprintf("%s & %s; sync", volumeUsageChecker, defaultMustGatherCommand)}, Env: []corev1.EnvVar{ { Name: "NODE_NAME", @@ -1164,8 +1151,9 @@ func (o *MustGatherOptions) newPod(node, image string, hasMaster bool, affinity }, }, }, - HostNetwork: o.HostNetwork, - NodeSelector: nodeSelector, + NodeSelector: map[string]string{ + corev1.LabelOSStable: "linux", + }, TerminationGracePeriodSeconds: &zero, Tolerations: []corev1.Toleration{ { @@ -1175,9 +1163,50 @@ func (o *MustGatherOptions) newPod(node, image string, hasMaster bool, affinity Operator: "Exists", }, }, - Affinity: affinity, }, } +} + +// newPod creates a pod with 2 containers with a shared volume mount: +// - gather: init containers that run gather command +// - copy: no-op container we can exec into +func (o *MustGatherOptions) newPod(node, image string, hasMaster bool, affinity *corev1.Affinity) *corev1.Pod { + executedCommand := defaultMustGatherCommand + if len(o.Command) > 0 { + executedCommand = strings.Join(o.Command, " ") + } + + cleanedSourceDir := path.Clean(o.SourceDir) + volumeUsageChecker := fmt.Sprintf(volumeUsageCheckerScript, cleanedSourceDir, cleanedSourceDir, o.VolumePercentage, o.VolumePercentage, executedCommand) + + ret := defaultMustGatherPod(image) + ret.Spec.Containers[0].Command = []string{"/bin/bash", "-c", fmt.Sprintf("%s & %s; sync", volumeUsageChecker, executedCommand)} + ret.Spec.Containers[0].VolumeMounts = []corev1.VolumeMount{{ + Name: "must-gather-output", + MountPath: cleanedSourceDir, + ReadOnly: false, + }} + ret.Spec.Containers[1].VolumeMounts = []corev1.VolumeMount{{ + Name: "must-gather-output", + MountPath: cleanedSourceDir, + ReadOnly: false, + }} + ret.Spec.HostNetwork = o.HostNetwork + if node == "" && hasMaster { + ret.Spec.NodeSelector[masterNodeRoleLabel] = "" + } + + if node != "" { + ret.Spec.NodeName = node + } else { + // Set affinity towards the most suitable nodes. E.g. to exclude + // nodes that if unreachable could cause the must-gather pod to stay + // in Pending state indefinitely. Please check getCandidateNodeNames + // function for more details about how the selection of the most + // suitable nodes is performed. + ret.Spec.Affinity = affinity + } + if o.HostNetwork { // If a user specified hostNetwork he might have intended to perform // packet captures on the host, for that we need to set the correct diff --git a/pkg/cli/admin/mustgather/mustgather_test.go b/pkg/cli/admin/mustgather/mustgather_test.go index db56c50322..ab6470d59f 100644 --- a/pkg/cli/admin/mustgather/mustgather_test.go +++ b/pkg/cli/admin/mustgather/mustgather_test.go @@ -3,6 +3,7 @@ package mustgather import ( "context" "fmt" + "path" "reflect" "testing" "time" @@ -19,6 +20,8 @@ import ( "k8s.io/client-go/kubernetes/fake" "k8s.io/utils/diff" + "github.com/google/go-cmp/cmp" + configv1 "github.com/openshift/api/config/v1" imagev1 "github.com/openshift/api/image/v1" configv1fake "github.com/openshift/client-go/config/clientset/versioned/fake" @@ -439,3 +442,152 @@ func TestGetCandidateNodeNames(t *testing.T) { }) } } + +func TestNewPod(t *testing.T) { + generateMustGatherPod := func(image string, update func(pod *corev1.Pod)) *corev1.Pod { + pod := defaultMustGatherPod(image) + update(pod) + return pod + } + + tests := []struct { + name string + options *MustGatherOptions + hasMasters bool + node string + affinity *corev1.Affinity + expectedPod *corev1.Pod + }{ + { + name: "node set, affinity provided, affinity not set", + options: &MustGatherOptions{ + VolumePercentage: defaultVolumePercentage, + SourceDir: defaultSourceDir, + }, + node: "node1", + affinity: buildNodeAffinity([]string{"node2"}), + expectedPod: generateMustGatherPod("image", func(pod *corev1.Pod) { + pod.Spec.NodeName = "node1" + }), + }, + { + name: "node not set, affinity provided, affinity set", + options: &MustGatherOptions{ + VolumePercentage: defaultVolumePercentage, + SourceDir: defaultSourceDir, + }, + affinity: buildNodeAffinity([]string{"node2"}), + expectedPod: generateMustGatherPod("image", func(pod *corev1.Pod) { + pod.Spec.Affinity = buildNodeAffinity([]string{"node2"}) + }), + }, + { + name: "custom source dir", + options: &MustGatherOptions{ + VolumePercentage: defaultVolumePercentage, + SourceDir: "custom-source-dir", + }, + expectedPod: generateMustGatherPod("image", func(pod *corev1.Pod) { + volumeUsageChecker := fmt.Sprintf(volumeUsageCheckerScript, "custom-source-dir", "custom-source-dir", defaultVolumePercentage, defaultVolumePercentage, defaultMustGatherCommand) + podCmd := fmt.Sprintf("%s & %s; sync", volumeUsageChecker, defaultMustGatherCommand) + pod.Spec.Containers[0].Command = []string{"/bin/bash", "-c", podCmd} + pod.Spec.Containers[0].VolumeMounts = []corev1.VolumeMount{{ + Name: "must-gather-output", + MountPath: "custom-source-dir", + }} + pod.Spec.Containers[1].VolumeMounts = []corev1.VolumeMount{{ + Name: "must-gather-output", + MountPath: "custom-source-dir", + }} + }), + }, + { + name: "host network", + options: &MustGatherOptions{ + VolumePercentage: defaultVolumePercentage, + SourceDir: defaultSourceDir, + HostNetwork: true, + }, + expectedPod: generateMustGatherPod("image", func(pod *corev1.Pod) { + pod.Spec.HostNetwork = true + pod.Spec.Containers[0].SecurityContext = &corev1.SecurityContext{ + Capabilities: &corev1.Capabilities{ + Add: []corev1.Capability{ + corev1.Capability("CAP_NET_RAW"), + }, + }, + } + }), + }, + { + name: "with control plane nodes", + options: &MustGatherOptions{ + VolumePercentage: defaultVolumePercentage, + SourceDir: defaultSourceDir, + }, + hasMasters: true, + expectedPod: generateMustGatherPod("image", func(pod *corev1.Pod) { + pod.Spec.NodeSelector[masterNodeRoleLabel] = "" + }), + }, + { + name: "with custom command", + options: &MustGatherOptions{ + VolumePercentage: defaultVolumePercentage, + SourceDir: defaultSourceDir, + Command: []string{"custom_command", "with_params"}, + }, + expectedPod: generateMustGatherPod("image", func(pod *corev1.Pod) { + cleanSourceDir := path.Clean(defaultSourceDir) + volumeUsageChecker := fmt.Sprintf(volumeUsageCheckerScript, cleanSourceDir, cleanSourceDir, defaultVolumePercentage, defaultVolumePercentage, "custom_command with_params") + podCmd := fmt.Sprintf("%s & %s; sync", volumeUsageChecker, "custom_command with_params") + pod.Spec.Containers[0].Command = []string{"/bin/bash", "-c", podCmd} + pod.Spec.Containers[0].VolumeMounts = []corev1.VolumeMount{{ + Name: "must-gather-output", + MountPath: cleanSourceDir, + }} + pod.Spec.Containers[1].VolumeMounts = []corev1.VolumeMount{{ + Name: "must-gather-output", + MountPath: cleanSourceDir, + }} + }), + }, + { + name: "with since", + options: &MustGatherOptions{ + VolumePercentage: defaultVolumePercentage, + SourceDir: defaultSourceDir, + Since: 2 * time.Minute, + }, + expectedPod: generateMustGatherPod("image", func(pod *corev1.Pod) { + pod.Spec.Containers[0].Env = append(pod.Spec.Containers[0].Env, corev1.EnvVar{ + Name: "MUST_GATHER_SINCE", + Value: "2m0s", + }) + }), + }, + { + name: "with sincetime", + options: &MustGatherOptions{ + VolumePercentage: defaultVolumePercentage, + SourceDir: defaultSourceDir, + SinceTime: "2023-09-24T15:30:00Z", + }, + expectedPod: generateMustGatherPod("image", func(pod *corev1.Pod) { + pod.Spec.Containers[0].Env = append(pod.Spec.Containers[0].Env, corev1.EnvVar{ + Name: "MUST_GATHER_SINCE_TIME", + Value: "2023-09-24T15:30:00Z", + }) + }), + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + tPod := test.options.newPod(test.node, "image", test.hasMasters, test.affinity) + if !cmp.Equal(test.expectedPod, tPod) { + t.Errorf("Unexpected pod command was generated: \n%s\n", cmp.Diff(test.expectedPod, tPod)) + } + }) + } +}