Permalink
Browse files

Redo HTML sanitization to fix issue with attribute quoting.

  • Loading branch information...
andre-d authored and spladug committed Apr 2, 2014
1 parent a801fde commit 62bfa4ad673c4f19683ed91c5ebb093bbe9f581d
Showing with 155 additions and 67 deletions.
  1. +80 −66 html/html.c
  2. +75 −1 test_snudown.py
View
@@ -391,94 +391,108 @@ static void
rndr_html_tag(struct buf *ob, const struct buf *text, void *opaque,
char* tagname, char** whitelist, int tagtype)
{
size_t i, x, z, in_str = 0, seen_equals = 0, done, reset;
struct buf *attr = bufnew(16);
size_t i, x, z, in_str = 0, seen_equals = 0, done = 0, done_attr = 0, reset = 0;
struct buf *attr;
struct buf *value;
char c;
bufputc(ob, '<');
i = 1 + strlen(tagname);
if(tagtype == HTML_TAG_CLOSE) {
bufputc(ob, '/');
i += 1;
bufputs(ob, tagname);
bufputc(ob, '>');
return;
}
bufputs(ob, tagname);
i = 1 + strlen(tagname);
if(tagtype != HTML_TAG_CLOSE) {
for(;i < text->size;i++) {
c = text->data[i];
done = 0;
reset = 0;
switch(c) {
case '>':
if(seen_equals && !in_str) {
done = 1;
reset = 1;
} else {
reset = 1;
}
break;
case '\'':
case '"':
if(!in_str)
in_str = c;
else if(in_str == c)
in_str = !in_str;
attr = bufnew(16);
value = bufnew(16);
for(; i < text->size && !done; i++) {
c = text->data[i];
done = 0;
reset = 0;
done_attr = 0;
switch(c) {
case '>':
done = 1;
break;
case '\'':
case '"':
if(!seen_equals) {
reset = 1;
} else if(!in_str) {
in_str = c;
} else if(in_str == c) {
in_str = 0;
done_attr = 1;
} else {
bufputc(value, c);
}
break;
case ' ':
if (in_str) {
bufputc(value, ' ');
} else {
reset = 1;
}
break;
case '=':
if(seen_equals) {
reset = 1;
break;
default:
if(!in_str) {
switch(c) {
case ' ':
if(seen_equals) {
done = 1;
reset = 1;
} else
reset = 1;
break;
case '=':
if(seen_equals) {
reset = 1;
} else {
for(z=0; whitelist[z]; z++) {
if(strlen(whitelist[z]) != attr->size)
continue;
for(x=0;x < attr->size; x++) {
if(tolower(whitelist[z][x]) != tolower(attr->data[x]))
break;
}
if(x == attr->size)
seen_equals = 1;
}
if(!seen_equals)
reset = 1;
}
break;
}
}
seen_equals = 1;
break;
default:
if(seen_equals && in_str || !seen_equals) {
bufputc(seen_equals ? value : attr, c);
}
break;
}
if(done_attr) {
int valid = 0;
for(z = 0; whitelist[z]; z++) {
if(strlen(whitelist[z]) != attr->size) {
continue;
}
for(x = 0; x < attr->size; x++) {
if(tolower(whitelist[z][x]) != tolower(attr->data[x])) {
break;
}
}
if(x == attr->size) {
valid = 1;
break;
}
}
if(done) {
if(valid && value->size && attr->size) {
bufputc(ob, ' ');
bufput(ob, attr->data, attr->size);
escape_html(ob, attr->data, attr->size);
bufputs(ob, "=\"");
escape_html(ob, value->data, value->size);
bufputc(ob, '"');
}
reset = 1;
}
if(reset) {
seen_equals = 0;
in_str = 0;
bufreset(attr);
} else {
bufputc(attr, c);
}
if(reset) {
seen_equals = 0;
in_str = 0;
bufreset(attr);
bufreset(value);
}
}
bufrelease(attr);
bufrelease(value);
bufputc(ob, '>');
}
static int
View
@@ -163,10 +163,78 @@
'<p>/R/reddit.com</p>\n',
}
wiki_cases = {
'<table scope="foo"bar>':
'<p><table scope="foo"></p>\n',
'<table scope="foo"bar colspan="2">':
'<p><table scope="foo" colspan="2"></p>\n',
'<table scope="foo" colspan="2"bar>':
'<p><table scope="foo" colspan="2"></p>\n',
'<table scope="foo">':
'<p><table scope="foo"></p>\n',
'<table scop="foo">':
'<p><table></p>\n',
'<table ff= scope="foo">':
'<p><table scope="foo"></p>\n',
'<table colspan= scope="foo">':
'<p><table scope="foo"></p>\n',
'<table scope=ff"foo">':
'<p><table scope="foo"></p>\n',
'<table scope="foo" test="test">':
'<p><table scope="foo"></p>\n',
'<table scope="foo" longervalue="testing test" scope="test">':
'<p><table scope="foo" scope="test"></p>\n',
'<table scope=`"foo">':
'<p><table scope="foo"></p>\n',
'<table scope="foo bar">':
'<p><table scope="foo bar"></p>\n',
'<table scope=\'foo colspan="foo">':
'<p><table></p>\n',
'<table scope=\'foo\' colspan="foo">':
'<p><table scope="foo" colspan="foo"></p>\n',
'<table scope=>':
'<p><table></p>\n',
'<table scope= colspan="test" scope=>':
'<p><table colspan="test"></p>\n',
'<table colspan="\'test">':
'<p><table colspan="&#39;test"></p>\n',
'<table scope="foo" colspan="2">':
'<p><table scope="foo" colspan="2"></p>\n',
'<table scope="foo" colspan="2" ff="test">':
'<p><table scope="foo" colspan="2"></p>\n',
'<table ff="test" scope="foo" colspan="2" colspan=>':
'<p><table scope="foo" colspan="2"></p>\n',
' <table colspan=\'\'\' a="" \' scope="foo">':
'<p><table scope="foo"></p>\n',
}
class SnudownTestCase(unittest.TestCase):
def __init__(self, renderer=snudown.RENDERER_USERTEXT):
self.renderer = renderer
unittest.TestCase.__init__(self)
def runTest(self):
output = snudown.markdown(self.input)
output = snudown.markdown(self.input, renderer=self.renderer)
for i, (a, b) in enumerate(zip(repr(self.expected_output),
repr(output))):
@@ -184,6 +252,12 @@ def runTest(self):
def test_snudown():
suite = unittest.TestSuite()
for input, expected_output in wiki_cases.iteritems():
case = SnudownTestCase(renderer=snudown.RENDERER_WIKI)
case.input = input
case.expected_output = expected_output
suite.addTest(case)
for input, expected_output in cases.iteritems():
case = SnudownTestCase()
case.input = input

0 comments on commit 62bfa4a

Please sign in to comment.