Skip to content

Commit

Permalink
Use ASAN to ensure there are no memory leaks when running the entire …
Browse files Browse the repository at this point in the history
…html5lib test suite

Since the normal tests go through python, it was not possible to check
for leaks as python leaks on exit. So create a small C program
to run the checks.
  • Loading branch information
kovidgoyal committed Jul 28, 2017
1 parent b28e922 commit 551729e
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 27 deletions.
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,4 @@ install:

script:
- python $BUILDER test
- if [[ $BUILDER == "build.py" ]]; then python $BUILDER leak; fi
6 changes: 3 additions & 3 deletions build.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,9 +285,9 @@ def main():
TEST_EXE, TEST_EXE, '-c', 'from html5_parser import *; ' + args.rest[0], *args.rest[1:])
elif args.action == 'leak':
build(args, build_leak_check=True)
p = subprocess.Popen([MEMLEAK_EXE], stdin=subprocess.PIPE)
p.communicate(('<p class="one">two<span>three</span>four' * 10).encode('utf-8'))
raise SystemExit(p.wait())
os.environ['MEMLEAK_EXE'] = os.path.abspath(MEMLEAK_EXE)
os.environ['ASAN_OPTIONS'] = 'leak_check_at_exit=0'
os.execlp(TEST_EXE, TEST_EXE, 'run_tests.py')


if __name__ == '__main__':
Expand Down
9 changes: 6 additions & 3 deletions mem-leak-check.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <libxml/xmlmemory.h>
#include "src/as-libxml.h"

static inline libxml_doc*
Expand Down Expand Up @@ -36,13 +37,15 @@ int main(int UNUSED argc, char UNUSED **argv) {
Options opts = {0};
opts.gumbo_opts = kGumboDefaultOptions;
opts.stack_size = 16 * 1024;
opts.gumbo_opts = kGumboDefaultOptions;
opts.gumbo_opts.max_errors = 0;
opts.namespace_elements = 1;
opts.keep_doctype = 1;
xmlInitParser();
ssize_t sz = read(STDIN_FILENO, buf, (sizeof(buf) / sizeof(buf[0])) - 1);
parse_with_options(buf, (size_t)sz, &opts);
opts.namespace_elements = 1;
opts.sanitize_names = 1;
opts.gumbo_opts.use_xhtml_rules = 1;
ssize_t sz = read(STDIN_FILENO, buf, (sizeof(buf) / sizeof(buf[0])) - 1);
parse_with_options(buf, (size_t)sz, &opts);
xmlCleanupParser();
return 0;
}
13 changes: 13 additions & 0 deletions run_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,21 @@ def find_tests():
return unittest.TestSuite(suites)


def run_memleak_tests():
tests = find_tests()

tests = filter_tests_by_name(tests, 'asan_memleak')
r = unittest.TextTestRunner
result = r(verbosity=4).run(tests)

if not result.wasSuccessful():
raise SystemExit(1)


def main():
sys.path.insert(0, base)
if 'MEMLEAK_EXE' in os.environ:
return run_memleak_tests()
parser = argparse.ArgumentParser(
description='''\
Run the specified tests, or all tests if none are specified. Tests
Expand Down
75 changes: 54 additions & 21 deletions test/html5lib_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import codecs
import os
import re
import subprocess
import unittest

from html5_parser import check_bom, check_for_meta_charset, parse
Expand Down Expand Up @@ -55,6 +56,7 @@ def is_section_heading(self, line):
return False

def normalize(self, data):

def n(x):
if x.endswith('\n'):
x = x[:-1]
Expand Down Expand Up @@ -133,16 +135,21 @@ def serialize_node(node, level=1):

class BaseTest(TestCase):

@classmethod
def data_for_test(cls, test, expected='document'):
return test.get('document-fragment'), test.get('data'), test.get(expected), test.get(
'errors', '').split('\n')

@classmethod
def add_single(cls, test_name, num, test, expected):
inner_html, html, expected, errors = cls.data_for_test(test, expected)

def test_func(
self,
inner_html=test.get('document-fragment'),
html=test.get('data'),
expected=test.get(expected),
errors=test.get('errors', '').split('\n')
):
self,
inner_html=inner_html,
html=html,
expected=expected,
errors=errors):
return self.implementation(inner_html, html, expected, errors, test_name)

test_func.__name__ = str('test_%s_%d' % (test_name, num))
Expand All @@ -151,28 +158,34 @@ def test_func(

class ConstructionTests(BaseTest):

def implementation(self, inner_html, html, expected, errors, test_name):
html = inner_html or html
@classmethod
def check_test(cls, inner_html, html, expected, errors, test_name):
if test_name == 'isindex' or html == '<!doctype html><isindex type="hidden">':
raise unittest.SkipTest('gumbo and html5lib differ on <isindex> parsing'
' and I cannot be bothered to figure out who is right')
return (
'gumbo and html5lib differ on <isindex> parsing'
' and I cannot be bothered to figure out who is right')
if test_name == 'menuitem-element':
raise unittest.SkipTest('gumbo and html5lib differ on <menuitem> parsing'
' and I cannot be bothered to figure out who is right')
return (
'gumbo and html5lib differ on <menuitem> parsing'
' and I cannot be bothered to figure out who is right')
noscript = re.search(r'^\| +<noscript>$', expected, flags=re.MULTILINE)
if noscript is not None:
raise unittest.SkipTest('<noscript> is always parsed with scripting off by gumbo')
return '<noscript> is always parsed with scripting off by gumbo'
if '<thisisasillytestelementnametomakesurecrazytagnamesareparsedcorrectly>' in expected:
raise unittest.SkipTest('gumbo unlike html5lib, does not lowercase unknown tag names')
return 'gumbo unlike html5lib, does not lowercase unknown tag names'
for line in errors:
if 'expected-doctype-name-but' in line or 'unknown-doctype' in line:
raise unittest.SkipTest('gumbo auto-corrects malformed doctypes')

return 'gumbo auto-corrects malformed doctypes'
if inner_html:
raise unittest.SkipTest('TODO: Implement fragment parsing')
else:
root = parse(html, namespace_elements=True, sanitize_names=False)
return 'TODO: Implement fragment parsing'

def implementation(self, inner_html, html, expected, errors, test_name):
html = inner_html or html
bad = self.check_test(inner_html, html, expected, errors, test_name)
if bad is not None:
raise unittest.SkipTest(bad)

root = parse(html, namespace_elements=True, sanitize_names=False)
output = serialize_construction_output(root)

# html5lib doesn't yet support the template tag, but it appears in the
Expand All @@ -196,8 +209,8 @@ def implementation(self, inner_html, html, expected, errors, test_name):
raise unittest.SkipTest('buggy html5lib test')
raw = html.encode('utf-8')
output = check_bom(raw) or check_for_meta_charset(raw) or 'windows-1252'
error_msg = '\n'.join(map(type(''), [
'\n\nInput:', html, '\nExpected:', expected, '\nReceived:', output]))
error_msg = '\n'.join(
map(type(''), ['\n\nInput:', html, '\nExpected:', expected, '\nReceived:', output]))
self.ae(expected.lower(), output, error_msg + '\n')


Expand All @@ -217,6 +230,26 @@ def load_suite(group, case_class, expected='document', data_class=TestData):
return unittest.defaultTestLoader.loadTestsFromTestCase(case_class)


class MemLeak(BaseTest):

@unittest.skipUnless('MEMLEAK_EXE' in os.environ, 'memleak check exe not available')
def test_asan_memleak(self):
MEMLEAK_EXE = os.environ['MEMLEAK_EXE']
env = os.environ.copy()
env.pop('ASAN_OPTIONS', None)
for path in html5lib_test_files('tree-construction'):
test_name = os.path.basename(path).rpartition('.')[0]
for i, test in enumerate(TestData(path)):
inner_html, html, expected, errors = ConstructionTests.data_for_test(test)
bad = ConstructionTests.check_test(inner_html, html, expected, errors, test_name)
if bad is not None:
continue
p = subprocess.Popen([MEMLEAK_EXE], stdin=subprocess.PIPE, env=env)
p.communicate(html.encode('utf-8'))
self.ae(p.wait(), 0, 'The test {}-{} failed'.format(test_name, i))


def find_tests():
yield load_suite('tree-construction', ConstructionTests)
yield load_suite('encoding', EncodingTests, expected='encoding')
yield unittest.defaultTestLoader.loadTestsFromTestCase(MemLeak)

0 comments on commit 551729e

Please sign in to comment.