Skip to content

Commit

Permalink
Issue #10 (#15)
Browse files Browse the repository at this point in the history
* upgraded batik-css version
removed redundant regex beginning/end markers
updated policies to contain new url validation which is sensitive to
html5 colon entity

* removed test code
  • Loading branch information
nahsra committed Sep 25, 2017
1 parent c48ed19 commit 82da009
Show file tree
Hide file tree
Showing 9 changed files with 33 additions and 16 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
<dependency>
<groupId>org.apache.xmlgraphics</groupId>
<artifactId>batik-css</artifactId>
<version>1.9</version>
<version>1.9.1</version>
</dependency>
<dependency>
<groupId>net.sourceforge.nekohtml</groupId>
Expand Down
13 changes: 4 additions & 9 deletions src/main/java/org/owasp/validator/html/Policy.java
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,6 @@ public class Policy {
public static final String ACTION_FILTER = "filter";
public static final String ACTION_TRUNCATE = "truncate";

private static char REGEXP_BEGIN = '^';
private static char REGEXP_END = '$';

private final Map<String, AntiSamyPattern> commonRegularExpressions;
protected final Map<String, Tag> tagRules;
private final Map<String, Property> cssRules;
Expand Down Expand Up @@ -588,7 +585,7 @@ private static List<Pattern> getAllowedRegexps(Map<String, AntiSamyPattern> comm
if (regExpName != null && regExpName.length() > 0) {
allowedRegExp.add(commonRegularExpressions1.get(regExpName).getPattern());
} else {
allowedRegExp.add(Pattern.compile(REGEXP_BEGIN + value + REGEXP_END));
allowedRegExp.add(Pattern.compile(value));
}
}
return allowedRegExp;
Expand Down Expand Up @@ -617,7 +614,7 @@ private static List<Pattern> getAllowedRegexps2(Map<String, AntiSamyPattern> com
}

} else if (value != null && value.length() > 0) {
allowedRegexps.add(Pattern.compile(REGEXP_BEGIN + value + REGEXP_END));
allowedRegexps.add(Pattern.compile(value));
}
}
return allowedRegexps;
Expand All @@ -634,16 +631,14 @@ private static List<Pattern> getAllowedRegexp3(Map<String, AntiSamyPattern> comm
if (pattern != null) {
allowedRegExp.add(pattern.getPattern());
} else if (value != null) {
allowedRegExp.add(Pattern.compile(REGEXP_BEGIN + value + REGEXP_END));
allowedRegExp.add(Pattern.compile(value));
} else {
throw new PolicyException("Regular expression '" + regExpName + "' was referenced as a common regexp in definition of '" + name + "', but does not exist in <common-regexp>");
}
}
return allowedRegExp;
}




private static void parseTagRules(Element root, Map<String, Attribute> commonAttributes1, Map<String, AntiSamyPattern> commonRegularExpressions1, Map<String, Tag> tagRules1) throws PolicyException {

if (root == null) return;
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/antisamy-anythinggoes.xml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ http://www.w3.org/TR/html401/struct/global.html
<regexp name="htmlTitle" value="[\p{L}\p{N}\s\-_',:\[\]!\./\\\(\)&amp;]*"/> <!-- force non-empty with a '+' at the end instead of '*' -->
<regexp name="htmlClass" value="[a-zA-Z0-9\s,\-_]+"/>

<regexp name="onsiteURL" value="([\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]+|\#(\w)+)"/>
<regexp name="onsiteURL" value="^(?![\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*(&amp;colon))[\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*"/>
<regexp name="offsiteURL" value="(\s)*((ht|f)tp(s?)://|mailto:)[\p{L}\p{N}]+[\p{L}\p{N}\p{Zs}\.\#@\$%\+&amp;;:\-_~,\?=/!\(\)]*(\s)*"/>

<regexp name="boolean" value="(true|false)"/>
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/antisamy-ebay.xml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ http://www.w3.org/TR/html401/struct/global.html
<regexp name="htmlTitle" value="[\p{L}\p{N}\s\-_',:\[\]!\./\\\(\)&amp;]*"/> <!-- force non-empty with a '+' at the end instead of '*' -->
<regexp name="htmlClass" value="[a-zA-Z0-9\s,\-_]+"/>

<regexp name="onsiteURL" value="([\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]+|\#(\w)+)"/>
<regexp name="onsiteURL" value="^(?![\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*(&amp;colon))[\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*"/>
<regexp name="offsiteURL" value="(\s)*((ht|f)tp(s?)://|mailto:)[\p{L}\p{N}]+[\p{L}\p{N}\p{Zs}\.\#@\$%\+&amp;;:\-_~,\?=/!\(\)]*(\s)*"/>

<regexp name="boolean" value="(true|false)"/>
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/antisamy-myspace.xml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ http://www.w3.org/TR/html401/struct/global.html
<regexp name="htmlTitle" value="[\p{L}\p{N}\s\-_',:\[\]!\./\\\(\)&amp;]*"/> <!-- force non-empty with a '+' at the end instead of '*' -->
<regexp name="htmlClass" value="[a-zA-Z0-9\s,\-_]+"/>

<regexp name="onsiteURL" value="([\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]+|\#(\w)+)"/>
<regexp name="onsiteURL" value="^(?![\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*(&amp;colon))[\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*"/>
<regexp name="offsiteURL" value="(\s)*((ht|f)tp(s?)://|mailto:)[\p{L}\p{N}]+[\p{L}\p{N}\p{Zs}\.\#@\$%\+&amp;;:\-_~,\?=/!\(\)]*(\s)*"/>

<regexp name="boolean" value="(true|false)"/>
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/antisamy-slashdot.xml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ Slashdot allowed tags taken from "Reply" page:
-->

<regexp name="htmlTitle" value="[\p{L}\p{N}\s\-_',:\[\]!\./\\\(\)&amp;]*"/> <!-- force non-empty with a '+' at the end instead of '*' -->
<regexp name="onsiteURL" value="([\p{L}\p{N}\\/\.\?=\#&amp;;\-_~]+|\#(\w)+)"/>
<regexp name="onsiteURL" value="^(?![\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*(&amp;colon))[\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*"/>
<regexp name="offsiteURL" value="(\s)*((ht|f)tp(s?)://|mailto:)[\p{L}\p{N}]+[~\p{L}\p{N}\p{Zs}\-_\.@\#\$%&amp;;:,\?=/\+!\(\)]*(\s)*"/>

</common-regexps>
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/antisamy-tinymce.xml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

<!-- force non-empty with a '+' at the end instead of '*'
-->
<regexp name="onsiteURL" value="([\p{L}\p{N}\p{Zs}/\.\?=&amp;\-~])+" />
<regexp name="onsiteURL" value="^(?![\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*(&amp;colon))[\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*"/>

<!-- ([\w\\/\.\?=&amp;;\#-~]+|\#(\w)+)
-->
Expand Down
5 changes: 4 additions & 1 deletion src/main/resources/antisamy.xml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ http://www.w3.org/TR/html401/struct/global.html
<regexp name="htmlTitle" value="[\p{L}\p{N}\s\-_',:\[\]!\./\\\(\)&amp;]*"/> <!-- force non-empty with a '+' at the end instead of '*' -->
<regexp name="htmlClass" value="[a-zA-Z0-9\s,\-_]+"/>

<regexp name="onsiteURL" value="([\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]+|\#(\w)+)"/>
<regexp name="onsiteURL" value="^(?![\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*(&amp;colon))[\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*"/>
<regexp name="anchoredURL" value="#(\w)+"/>
<regexp name="offsiteURL" value="(\s)*((ht|f)tp(s?)://|mailto:)[\p{L}\p{N}]+[\p{L}\p{N}\p{Zs}\.\#@\$%\+&amp;;:\-_~,\?=/!\(\)]*(\s)*"/>

<regexp name="boolean" value="(true|false)"/>
Expand Down Expand Up @@ -220,6 +221,7 @@ http://www.w3.org/TR/html401/struct/global.html
<attribute name="href">
<regexp-list>
<regexp name="onsiteURL"/>
<regexp name="anchoredURL"/>
<regexp name="offsiteURL"/>
</regexp-list>
</attribute>
Expand Down Expand Up @@ -640,6 +642,7 @@ http://www.w3.org/TR/html401/struct/global.html
<attribute name="usemap">
<regexp-list>
<regexp name="onsiteURL"/>
<regexp name="anchoredURL"/>
</regexp-list>
</attribute>

Expand Down
19 changes: 19 additions & 0 deletions src/test/java/org/owasp/validator/html/test/AntiSamyTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -1173,6 +1173,25 @@ public void comparePatternSpeed() throws IOException, ScanException, PolicyExcep
System.out.println("match then replace: " + total2);
}

@Test
public void testOnsiteRegex() throws ScanException, PolicyException {
assertIsGoodOnsiteURL("foo");
assertIsGoodOnsiteURL("/foo/bar");
assertIsGoodOnsiteURL("../../di.cgi?foo&amp;3D~");
assertIsGoodOnsiteURL("/foo/bar/1/sdf;jsessiond=1f1f12312_123123");
}

void assertIsGoodOnsiteURL(String url) throws ScanException, PolicyException {
String html = as.scan("<a href=\"" + url + "\">X</a>", policy, AntiSamy.DOM).getCleanHTML();
assertTrue(html.contains("href=\""));
}

@Test
public void issue10() throws ScanException, PolicyException {
assertFalse(as.scan("<a href=\"javascript&colon;alert&lpar;1&rpar;\">X</a>", policy, AntiSamy.DOM).getCleanHTML().contains("javascript"));
assertFalse(as.scan("<a href=\"javascript&colon;alert&lpar;1&rpar;\">X</a>", policy, AntiSamy.SAX).getCleanHTML().contains("javascript"));
}

@Test
public void issue147() throws ScanException, PolicyException {
URL url = getClass().getResource("/antisamy-tinymce.xml");
Expand Down

0 comments on commit 82da009

Please sign in to comment.