Commit
Previously, any command output generated from plugins from add_cmd_output and add_journal would be collected in full in memory. For example, if a journal was 4GB in size, then 4GB would be read into memory and subsequently written to the final sos archive. This lead to not only potentially large archives, but in some cases failure to collect data or produce an archive due to memory constraints on the system. This patch adds the ability to use a sizelimit option in both add_cmd_output and add_journal. This will limit the collected output from commands or journals to the given limit, both what is read into memory and what is written to the final archive. If not given, sizelimit will default to --log-size. For journal collection, if no sizelimit is given then the larger of either --log-size or 100mb is used. Resolves: #1120 Signed-off-by: Jake Hunsaker <jhunsake@redhat.com> Signed-off-by: Bryn M. Reeves <bmr@redhat.com>
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,8 +17,10 @@ | |
import errno | ||
import shlex | ||
import glob | ||
import threading | ||
|
||
from contextlib import closing | ||
from collections import deque | ||
|
||
# PYCOMPAT | ||
import six | ||
|
@@ -105,7 +107,7 @@ def is_executable(command): | |
|
||
def sos_get_command_output(command, timeout=300, stderr=False, | ||
chroot=None, chdir=None, env=None, | ||
binary=False): | ||
binary=False, sizelimit=None): | ||
"""Execute a command and return a dictionary of status and output, | ||
optionally changing root or current working directory before | ||
executing command. | ||
|
@@ -147,7 +149,11 @@ def _child_prep_fn(): | |
stderr=STDOUT if stderr else PIPE, | ||
bufsize=-1, env=cmd_env, close_fds=True, | ||
preexec_fn=_child_prep_fn) | ||
stdout, stderr = p.communicate() | ||
|
||
reader = AsyncReader(p.stdout, sizelimit, binary) | ||
stdout = reader.get_contents() | ||
p.poll() | ||
This comment has been minimized.
Sorry, something went wrong.
This comment has been minimized.
Sorry, something went wrong.
bmr-cymru
Member
|
||
|
||
except OSError as e: | ||
if e.errno == errno.ENOENT: | ||
return {'status': 127, 'output': ""} | ||
|
@@ -159,7 +165,7 @@ def _child_prep_fn(): | |
|
||
return { | ||
'status': p.returncode, | ||
'output': stdout if binary else stdout.decode('utf-8', 'ignore') | ||
'output': stdout | ||
} | ||
|
||
|
||
|
@@ -187,6 +193,55 @@ def shell_out(cmd, timeout=30, chroot=None, runat=None): | |
chroot=chroot, chdir=runat)['output'] | ||
|
||
|
||
class AsyncReader(threading.Thread): | ||
'''Used to limit command output to a given size without deadlocking | ||
sos. | ||
Takes a sizelimit value in MB, and will compile stdout from Popen into a | ||
string that is limited to the given sizelimit. | ||
''' | ||
|
||
def __init__(self, channel, sizelimit, binary): | ||
super(AsyncReader, self).__init__() | ||
self.chan = channel | ||
self.binary = binary | ||
self.chunksize = 2048 | ||
slots = None | ||
if sizelimit: | ||
sizelimit = sizelimit * 1048576 # convert to bytes | ||
slots = sizelimit / self.chunksize | ||
self.deque = deque(maxlen=slots) | ||
self.start() | ||
self.join() | ||
This comment has been minimized.
Sorry, something went wrong.
vadmium
|
||
|
||
def run(self): | ||
'''Reads from the channel (pipe) that is the output pipe for a | ||
called Popen. As we are reading from the pipe, the output is added | ||
to a deque. After the size of the deque exceeds the sizelimit | ||
earlier (older) entries are removed. | ||
This means the returned output is chunksize-sensitive, but is not | ||
really byte-sensitive. | ||
''' | ||
try: | ||
while True: | ||
line = self.chan.read(self.chunksize) | ||
if not line: | ||
# Pipe can remain open after output has completed | ||
break | ||
self.deque.append(line) | ||
except (ValueError, IOError): | ||
# pipe has closed, meaning command output is done | ||
pass | ||
|
||
def get_contents(self): | ||
'''Returns the contents of the deque as a string''' | ||
if not self.binary: | ||
return ''.join(ln.decode('utf-8', 'ignore') for ln in self.deque) | ||
else: | ||
return b''.join(ln for ln in self.deque) | ||
|
||
|
||
class ImporterHelper(object): | ||
"""Provides a list of modules that can be imported in a package. | ||
Importable modules are located along the module __path__ list and modules | ||
|
Some comments on this code, brought up at https://bugs.python.org/issue34566:
If the child closed its end of the stdout pipe before exiting, the exit status is not necessarily available. Possible solution is to call wait rather than poll.
If you passed "stderr=PIPE" and the child writes lots to stderr it will fill up the pipe's buffers. Since this change no longer closes nor reads that pipe, there could be a deadlock with the child writing to stderr and the parent reading stdout. Possible solution is to use stderr=DEVNULL.