-
Notifications
You must be signed in to change notification settings - Fork 363
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
WRKLDS-1012: oc adm must-gather: pull gather container logs #1641
Merged
openshift-merge-bot
merged 2 commits into
openshift:master
from
ingvagabund:must-gather-pull-gather-container-logs
Jan 19, 2024
Merged
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -47,6 +47,10 @@ import ( | |
ocmdhelpers "github.com/openshift/oc/pkg/helpers/cmd" | ||
) | ||
|
||
const ( | ||
gatherContainerName = "gather" | ||
) | ||
|
||
var ( | ||
mustGatherLong = templates.LongDesc(` | ||
Launch a pod to gather debugging information. | ||
|
@@ -77,13 +81,13 @@ var ( | |
|
||
volumeUsageCheckerScript = ` | ||
echo "volume percentage checker started....." | ||
while true; do | ||
while true; do | ||
disk_usage=$(du -s "%s" | awk '{print $1}') | ||
disk_space=$(df -P "%s" | awk 'NR==2 {print $2}') | ||
usage_percentage=$(( (disk_usage * 100) / disk_space )) | ||
echo "volume usage percentage $usage_percentage" | ||
if [ "$usage_percentage" -gt "%d" ]; then | ||
echo "Disk usage exceeds the volume percentage of %d for mounted directory. Exiting..." | ||
echo "volume usage percentage $usage_percentage" | ||
if [ "$usage_percentage" -gt "%d" ]; then | ||
echo "Disk usage exceeds the volume percentage of %d for mounted directory. Exiting..." | ||
# kill gathering process in gather container to prevent disk to use more. | ||
pkill --signal SIGKILL -f %s | ||
exit 1 | ||
|
@@ -125,14 +129,15 @@ func NewMustGatherCommand(f kcmdutil.Factory, streams genericiooptions.IOStreams | |
} | ||
|
||
func NewMustGatherOptions(streams genericiooptions.IOStreams) *MustGatherOptions { | ||
return &MustGatherOptions{ | ||
opts := &MustGatherOptions{ | ||
SourceDir: "/must-gather/", | ||
IOStreams: streams, | ||
LogOut: newPrefixWriter(streams.Out, "[must-gather ] OUT"), | ||
RawOut: streams.Out, | ||
Timeout: 10 * time.Minute, | ||
VolumePercentage: 30, | ||
} | ||
opts.LogOut = opts.newPrefixWriter(streams.Out, "[must-gather ] OUT", false) | ||
opts.RawOut = opts.newPrefixWriter(streams.Out, "", false) | ||
return opts | ||
} | ||
|
||
func (o *MustGatherOptions) Complete(f kcmdutil.Factory, cmd *cobra.Command, args []string) error { | ||
|
@@ -206,6 +211,7 @@ func (o *MustGatherOptions) completeImages() error { | |
o.Images = append(o.Images, image) | ||
} | ||
o.log("Using must-gather plug-in image: %s", strings.Join(o.Images, ", ")) | ||
|
||
return nil | ||
} | ||
|
||
|
@@ -275,6 +281,9 @@ type MustGatherOptions struct { | |
LogOut io.Writer | ||
// RawOut is used for printing information we're looking to have copy/pasted into bugs | ||
RawOut io.Writer | ||
|
||
LogWriter *os.File | ||
LogWriterMux sync.Mutex | ||
} | ||
|
||
func (o *MustGatherOptions) Validate() error { | ||
|
@@ -316,6 +325,27 @@ func (o *MustGatherOptions) Validate() error { | |
func (o *MustGatherOptions) Run() error { | ||
var errs []error | ||
|
||
if err := os.MkdirAll(o.DestDir, os.ModePerm); err != nil { | ||
// ensure the errors bubble up to BackupGathering method for display | ||
errs = []error{err} | ||
return err | ||
} | ||
|
||
f, err := os.Create(path.Join(o.DestDir, "must-gather.logs")) | ||
if err != nil { | ||
fmt.Fprintf(os.Stderr, "Unable to write must-gather logs: %v. It is possible the destination directory has not been created yet due to early termination\n", err) | ||
} else { | ||
o.LogWriter = f | ||
o.LogWriterMux.Lock() | ||
// gets invoked in Complete step before must-gather.logs is created | ||
o.LogWriter.WriteString(fmt.Sprintf("[must-gather ] OUT Using must-gather plug-in image: %s\n", strings.Join(o.Images, ", "))) | ||
o.LogWriterMux.Unlock() | ||
|
||
defer func() { | ||
o.LogWriter.Close() | ||
}() | ||
} | ||
|
||
// print at both the beginning and at the end. This information is important enough to be in both spots. | ||
o.PrintBasicClusterState(context.TODO()) | ||
defer func() { | ||
|
@@ -413,11 +443,6 @@ func (o *MustGatherOptions) Run() error { | |
} | ||
|
||
// log timestamps... | ||
if err := os.MkdirAll(o.DestDir, os.ModePerm); err != nil { | ||
// ensure the errors bubble up to BackupGathering method for display | ||
errs = []error{err} | ||
return err | ||
} | ||
if err := o.logTimestamp(); err != nil { | ||
// ensure the errors bubble up to BackupGathering method for display | ||
errs = []error{err} | ||
|
@@ -445,7 +470,7 @@ func (o *MustGatherOptions) Run() error { | |
}() | ||
} | ||
|
||
log := newPodOutLogger(o.Out, pod.Name) | ||
log := o.newPodOutLogger(o.Out, pod.Name) | ||
|
||
// wait for gather container to be running (gather is running) | ||
if err := o.waitForGatherContainerRunning(pod); err != nil { | ||
|
@@ -509,8 +534,8 @@ func (o *MustGatherOptions) Run() error { | |
return errors.NewAggregate(errs) | ||
} | ||
|
||
func newPodOutLogger(out io.Writer, podName string) func(string, ...interface{}) { | ||
writer := newPrefixWriter(out, fmt.Sprintf("[%s] OUT", podName)) | ||
func (o *MustGatherOptions) newPodOutLogger(out io.Writer, podName string) func(string, ...interface{}) { | ||
writer := o.newPrefixWriter(out, fmt.Sprintf("[%s] OUT", podName), false) | ||
return func(format string, a ...interface{}) { | ||
fmt.Fprintf(writer, format+"\n", a...) | ||
} | ||
|
@@ -531,7 +556,7 @@ func (o *MustGatherOptions) logTimestamp() error { | |
|
||
func (o *MustGatherOptions) copyFilesFromPod(pod *corev1.Pod) error { | ||
streams := o.IOStreams | ||
streams.Out = newPrefixWriter(streams.Out, fmt.Sprintf("[%s] OUT", pod.Name)) | ||
streams.Out = o.newPrefixWriter(streams.Out, fmt.Sprintf("[%s] OUT", pod.Name), false) | ||
imageFolder := regexp.MustCompile("[^A-Za-z0-9]+").ReplaceAllString(pod.Status.ContainerStatuses[0].ImageID, "-") | ||
var destDir string | ||
if o.NodeSelector != "" { | ||
|
@@ -542,6 +567,33 @@ func (o *MustGatherOptions) copyFilesFromPod(pod *corev1.Pod) error { | |
if err := os.MkdirAll(destDir, os.ModePerm); err != nil { | ||
return err | ||
} | ||
|
||
var errs []error | ||
|
||
// get must-gather gather container logs | ||
if err := func() error { | ||
dest, err := os.OpenFile(path.Join(destDir, "/gather.logs"), os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0755) | ||
if err != nil { | ||
return err | ||
} | ||
defer dest.Close() | ||
|
||
logOptions := &corev1.PodLogOptions{ | ||
Container: gatherContainerName, | ||
Timestamps: true, | ||
} | ||
readCloser, err := o.Client.CoreV1().Pods(pod.Namespace).GetLogs(pod.Name, logOptions).Stream(context.TODO()) | ||
if err != nil { | ||
return err | ||
} | ||
defer readCloser.Close() | ||
|
||
_, err = io.Copy(dest, readCloser) | ||
return err | ||
}(); err != nil { | ||
errs = append(errs, err) | ||
} | ||
|
||
rsyncOptions := &rsync.RsyncOptions{ | ||
Namespace: pod.Namespace, | ||
Source: &rsync.PathSpec{PodName: pod.Name, Path: path.Clean(o.SourceDir) + "/"}, | ||
|
@@ -559,8 +611,9 @@ func (o *MustGatherOptions) copyFilesFromPod(pod *corev1.Pod) error { | |
klog.V(4).Infof("re-trying rsync after initial failure %v", err) | ||
// re-try copying data before letting it go | ||
err = rsyncOptions.RunRsync() | ||
errs = append(errs, err) | ||
} | ||
return err | ||
return errors.NewAggregate(errs) | ||
} | ||
|
||
func (o *MustGatherOptions) getGatherContainerLogs(pod *corev1.Pod) error { | ||
|
@@ -577,7 +630,7 @@ func (o *MustGatherOptions) getGatherContainerLogs(pod *corev1.Pod) error { | |
Object: pod, | ||
ConsumeRequestFn: logs.DefaultConsumeRequest, | ||
LogsForObject: polymorphichelpers.LogsForObjectFn, | ||
IOStreams: genericiooptions.IOStreams{Out: newPrefixWriter(o.Out, fmt.Sprintf("[%s] POD", pod.Name))}, | ||
IOStreams: genericiooptions.IOStreams{Out: o.newPrefixWriter(o.Out, fmt.Sprintf("[%s] POD", pod.Name), true)}, | ||
} | ||
|
||
for { | ||
|
@@ -597,12 +650,21 @@ func (o *MustGatherOptions) getGatherContainerLogs(pod *corev1.Pod) error { | |
} | ||
} | ||
|
||
func newPrefixWriter(out io.Writer, prefix string) io.Writer { | ||
func (o *MustGatherOptions) newPrefixWriter(out io.Writer, prefix string, ignoreFileWriter bool) io.Writer { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In which circumstance we should pass There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. E.g. when logs are printed for individual must-gather running container logs. |
||
reader, writer := io.Pipe() | ||
scanner := bufio.NewScanner(reader) | ||
if prefix != "" { | ||
prefix = prefix + " " | ||
} | ||
go func() { | ||
for scanner.Scan() { | ||
fmt.Fprintf(out, "%s %s\n", prefix, scanner.Text()) | ||
text := scanner.Text() | ||
if !ignoreFileWriter && o.LogWriter != nil { | ||
o.LogWriterMux.Lock() | ||
o.LogWriter.WriteString(fmt.Sprintf("%s%s\n", prefix, text)) | ||
o.LogWriterMux.Unlock() | ||
} | ||
fmt.Fprintf(out, "%s%s\n", prefix, text) | ||
} | ||
}() | ||
return writer | ||
|
@@ -625,7 +687,7 @@ func (o *MustGatherOptions) isGatherDone(pod *corev1.Pod) (bool, error) { | |
} | ||
var state *corev1.ContainerState | ||
for _, cstate := range pod.Status.ContainerStatuses { | ||
if cstate.Name == "gather" { | ||
if cstate.Name == gatherContainerName { | ||
state = &cstate.State | ||
break | ||
} | ||
|
@@ -803,7 +865,7 @@ func (o *MustGatherOptions) newPod(node, image string, hasMaster bool) *corev1.P | |
}, | ||
Containers: []corev1.Container{ | ||
{ | ||
Name: "gather", | ||
Name: gatherContainerName, | ||
Image: image, | ||
ImagePullPolicy: corev1.PullIfNotPresent, | ||
// always force disk flush to ensure that all data gathered is accessible in the copy container | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What if user passes
--all-images
flag?. Are we handling it also?.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We are. https://github.com/openshift/oc/blob/master/pkg/cli/admin/mustgather/mustgather.go#L208 populates
o.Images
which includes all images. Changes in #1633 extendcompleteImages()
which is invoked before this line.