Skip to content

Commit

Permalink
mtest: fix xml chars discouraged to use
Browse files Browse the repository at this point in the history
  • Loading branch information
nbyavuz committed Aug 24, 2022
1 parent f8ebfdf commit fcb3684
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 2 deletions.
28 changes: 26 additions & 2 deletions mesonbuild/mtest.py
Expand Up @@ -61,6 +61,23 @@
# Exit if 3 Ctrl-C's are received within one second
MAX_CTRLC = 3

# Define unencodable xml characters' regex for replacing
# them with their printable representation
unencodable_unichrs = [(0x00, 0x08), (0x0B, 0x0C), (0x0E, 0x1F), (0x7F, 0x84),
(0x86, 0x9F), (0xFDD0, 0xFDEF), (0xFFFE, 0xFFFF)]
# Not narrow build
if sys.maxunicode >= 0x10000:
unencodable_unichrs.extend([(0x1FFFE, 0x1FFFF), (0x2FFFE, 0x2FFFF),
(0x3FFFE, 0x3FFFF), (0x4FFFE, 0x4FFFF),
(0x5FFFE, 0x5FFFF), (0x6FFFE, 0x6FFFF),
(0x7FFFE, 0x7FFFF), (0x8FFFE, 0x8FFFF),
(0x9FFFE, 0x9FFFF), (0xAFFFE, 0xAFFFF),
(0xBFFFE, 0xBFFFF), (0xCFFFE, 0xCFFFF),
(0xDFFFE, 0xDFFFF), (0xEFFFE, 0xEFFFF),
(0xFFFFE, 0xFFFFF), (0x10FFFE, 0x10FFFF)])
unencodable_ranges = [fr'{chr(low)}-{chr(high)}' for (low, high) in unencodable_unichrs]
unencodable_xml_chars_re = re.compile('([' + ''.join(unencodable_ranges) + '])')

def is_windows() -> bool:
platname = platform.system().lower()
return platname == 'windows'
Expand Down Expand Up @@ -1103,14 +1120,21 @@ def parse_res(n: int, name: str, result: str) -> TAPParser.Test:

TestRun.PROTOCOL_TO_CLASS[TestProtocol.RUST] = TestRunRust

# Check unencodable characters in xml output and replace them with
# their printable representation
def replace_unencodable_xml_chars(original_str: str) -> str:
# [1:-1] is needed for removing `'` characters from both start and end
# of the string
replacement_lambda = lambda illegal_chr: repr(illegal_chr.group())[1:-1]
return unencodable_xml_chars_re.sub(replacement_lambda, original_str)

def decode(stream: T.Union[None, bytes]) -> str:
if stream is None:
return ''
try:
return stream.decode('utf-8')
return replace_unencodable_xml_chars(stream.decode('utf-8'))
except UnicodeDecodeError:
return stream.decode('iso-8859-1', errors='ignore')
return replace_unencodable_xml_chars(stream.decode('iso-8859-1', errors='ignore'))

async def read_decode(reader: asyncio.StreamReader,
queue: T.Optional['asyncio.Queue[T.Optional[str]]'],
Expand Down
8 changes: 8 additions & 0 deletions test cases/unit/110 replace unencodable xml chars/meson.build
@@ -0,0 +1,8 @@
project(
'replace unencodable xml chars',
'c',
version : '>= 0.55'
)

test_script = find_program('script.py')
test('main', test_script)
28 changes: 28 additions & 0 deletions test cases/unit/110 replace unencodable xml chars/script.py
@@ -0,0 +1,28 @@
#!/usr/bin/env python3

import sys

# Create base string(\nHello Meson\n) to see valid chars are not replaced
base_string_invalid = '\n\x48\x65\x6c\x6c\x6f\x20\x4d\x65\x73\x6f\x6e\n'
# Create invalid input from all known unencodable chars
invalid_string = (
'\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f\x10\x11'
'\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\x7f'
'\x80\x81\x82\x83\x84\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f'
'\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e'
'\x9f\ufdd0\ufdd1\ufdd2\ufdd3\ufdd4\ufdd5\ufdd6\ufdd7\ufdd8'
'\ufdd9\ufdda\ufddb\ufddc\ufddd\ufdde\ufddf\ufde0\ufde1'
'\ufde2\ufde3\ufde4\ufde5\ufde6\ufde7\ufde8\ufde9\ufdea'
'\ufdeb\ufdec\ufded\ufdee\ufdef\ufffe\uffff')
if sys.maxunicode >= 0x10000:
invalid_string = invalid_string + (
'\U0001fffe\U0001ffff\U0002fffe\U0002ffff'
'\U0003fffe\U0003ffff\U0004fffe\U0004ffff'
'\U0005fffe\U0005ffff\U0006fffe\U0006ffff'
'\U0007fffe\U0007ffff\U0008fffe\U0008ffff'
'\U0009fffe\U0009ffff\U000afffe\U000affff'
'\U000bfffe\U000bffff\U000cfffe\U000cffff'
'\U000dfffe\U000dffff\U000efffe\U000effff'
'\U000ffffe\U000fffff\U0010fffe\U0010ffff')

print(invalid_string)
51 changes: 51 additions & 0 deletions unittests/allplatformstests.py
Expand Up @@ -54,6 +54,7 @@

from mesonbuild.dependencies import PkgConfigDependency
from mesonbuild.build import Target, ConfigurationData, Executable, SharedLibrary, StaticLibrary
from mesonbuild import mtest
import mesonbuild.modules.pkgconfig
from mesonbuild.scripts import destdir_join

Expand Down Expand Up @@ -393,6 +394,56 @@ def test_static_compile_order(self):
self.assertTrue(compdb[3]['file'].endswith("libfile4.c"))
# FIXME: We don't have access to the linker command

def test_replace_unencodable_xml_chars(self):
'''
Test that unencodable xml chars are replaced with their
printable representation
https://github.com/mesonbuild/meson/issues/9894
'''
# Create base string(\nHello Meson\n) to see valid chars are not replaced
base_string_invalid = '\n\x48\x65\x6c\x6c\x6f\x20\x4d\x65\x73\x6f\x6e\n'
base_string_valid = '\nHello Meson\n'
# Create invalid input from all known unencodable chars
invalid_string = (
'\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f\x10\x11'
'\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\x7f'
'\x80\x81\x82\x83\x84\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f'
'\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e'
'\x9f\ufdd0\ufdd1\ufdd2\ufdd3\ufdd4\ufdd5\ufdd6\ufdd7\ufdd8'
'\ufdd9\ufdda\ufddb\ufddc\ufddd\ufdde\ufddf\ufde0\ufde1'
'\ufde2\ufde3\ufde4\ufde5\ufde6\ufde7\ufde8\ufde9\ufdea'
'\ufdeb\ufdec\ufded\ufdee\ufdef\ufffe\uffff')
if sys.maxunicode >= 0x10000:
invalid_string = invalid_string + (
'\U0001fffe\U0001ffff\U0002fffe\U0002ffff'
'\U0003fffe\U0003ffff\U0004fffe\U0004ffff'
'\U0005fffe\U0005ffff\U0006fffe\U0006ffff'
'\U0007fffe\U0007ffff\U0008fffe\U0008ffff'
'\U0009fffe\U0009ffff\U000afffe\U000affff'
'\U000bfffe\U000bffff\U000cfffe\U000cffff'
'\U000dfffe\U000dffff\U000efffe\U000effff'
'\U000ffffe\U000fffff\U0010fffe\U0010ffff')

valid_string = base_string_valid + repr(invalid_string)[1:-1] + base_string_valid
invalid_string = base_string_invalid + invalid_string + base_string_invalid
broken_xml_stream = invalid_string.encode()
decoded_broken_stream = mtest.decode(broken_xml_stream)
self.assertEqual(decoded_broken_stream, valid_string)

def test_replace_unencodable_xml_chars_unit(self):
'''
Test that unencodable xml chars are replaced with their
printable representation
https://github.com/mesonbuild/meson/issues/9894
'''
if not shutil.which('xmllint'):
raise SkipTest('xmllint not installed.')
testdir = os.path.join(self.unit_test_dir, '110 replace unencodable xml chars')
self.init(testdir)
self.run_tests()
junit_xml_logs = Path(self.logdir, 'testlog.junit.xml')
subprocess.run(['xmllint', junit_xml_logs], check=True)

def test_run_target_files_path(self):
'''
Test that run_targets are run from the correct directory
Expand Down

0 comments on commit fcb3684

Please sign in to comment.