Skip to content

Commit

Permalink
Put the logic to sanitize stdout/stderr under a method
Browse files Browse the repository at this point in the history
  • Loading branch information
humitos committed Aug 23, 2018
1 parent 4de3b5e commit 628af29
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 16 deletions.
40 changes: 26 additions & 14 deletions readthedocs/doc_builder/environments.py
Expand Up @@ -159,20 +159,8 @@ def run(self):
cmd_input_bytes = cmd_input
cmd_output = proc.communicate(input=cmd_input_bytes)
(cmd_stdout, cmd_stderr) = cmd_output
try:
self.output = cmd_stdout.decode('utf-8', 'replace')
# Replace NULL (\x00) character to avoid PostgreSQL db to fail
# https://code.djangoproject.com/ticket/28201
self.output = self.output.replace('\x00', '')
except (TypeError, AttributeError):
self.output = None
try:
self.error = cmd_stderr.decode('utf-8', 'replace')
# Replace NULL (\x00) character to avoid PostgreSQL db to fail
# https://code.djangoproject.com/ticket/28201
self.error = self.error.replace('\x00', '')
except (TypeError, AttributeError):
self.error = None
self.output = self.sanitize_output(cmd_stdout)
self.error = self.sanitize_output(cmd_stderr)
self.exit_code = proc.returncode
except OSError:
self.error = traceback.format_exc()
Expand All @@ -181,6 +169,30 @@ def run(self):
finally:
self.end_time = datetime.utcnow()

def sanitize_output(self, output):
"""
Sanitize ``output`` to be saved into the DB.
1. Decodes to UTF-8
2. Replaces NULL (\x00) characters with ``''`` (empty string) to
avoid PostgreSQL db to fail:
https://code.djangoproject.com/ticket/28201
:param output: stdout/stderr to be sanitized
:type output: bytes
:returns: sanitized output as string or ``None`` if it fails
"""
try:
sanitized = output.decode('utf-8', 'replace')
# Replace NULL (\x00) character to avoid PostgreSQL db to fail
# https://code.djangoproject.com/ticket/28201
sanitized = sanitized.replace('\x00', '')
except (TypeError, AttributeError):
sanitized = None
return sanitized

def get_command(self):
"""Flatten command."""
if hasattr(self.command, '__iter__') and not isinstance(self.command, str):
Expand Down
23 changes: 21 additions & 2 deletions readthedocs/rtd_tests/tests/test_doc_building.py
Expand Up @@ -1009,8 +1009,17 @@ def test_input(self):
def test_output(self):
"""Test output command."""
cmd = BuildCommand(['/bin/bash', '-c', 'echo -n FOOBAR'])
cmd.run()
self.assertEqual(cmd.output, 'FOOBAR')

# Mock BuildCommand.sanitized_output just to count the amount of calls,
# but use the original method to behaves as real
original_sanitized_output = cmd.sanitize_output
with patch('readthedocs.doc_builder.environments.BuildCommand.sanitize_output') as sanitize_output: # noqa
sanitize_output.side_effect = original_sanitized_output
cmd.run()
self.assertEqual(cmd.output, 'FOOBAR')

# Check that we sanitize the output
self.assertEqual(sanitize_output.call_count, 2)

def test_error_output(self):
"""Test error output from command."""
Expand All @@ -1026,6 +1035,16 @@ def test_error_output(self):
self.assertEqual(cmd.output, '')
self.assertEqual(cmd.error, 'FOOBAR')

def test_sanitize_output(self):
cmd = BuildCommand(['/bin/bash', '-c', 'echo'])
checks = (
(b'Hola', 'Hola'),
(b'H\x00i', 'Hi'),
(b'H\x00i \x00\x00\x00You!\x00', 'Hi You!'),
)
for output, sanitized in checks:
self.assertEqual(cmd.sanitize_output(output), sanitized)

@patch('subprocess.Popen')
def test_unicode_output(self, mock_subprocess):
"""Unicode output from command."""
Expand Down

0 comments on commit 628af29

Please sign in to comment.