Skip to content

Commit

Permalink
Fixed support for character class regular expressions in [attr=~regex…
Browse files Browse the repository at this point in the history
…] selector
  • Loading branch information
jhy committed Jul 31, 2010
1 parent ef1bbcb commit 98f3cce
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 18 deletions.
4 changes: 2 additions & 2 deletions CHANGES
Expand Up @@ -20,11 +20,11 @@ jsoup changelog
* Added support for tag names with - and _ (<abc_foo>, <abc-foo>)

* Improved HTML output format for empty elements and auto-detected self closing tags

* Implemented Node.ownerDocument DOM method

* Fixed support for character class regular expressions in [attr=~regex] selector

*** Release 1.2.2 [2010-Jul-11]

* Performance optimisation:
Expand Down
34 changes: 18 additions & 16 deletions src/main/java/org/jsoup/select/Selector.java
Expand Up @@ -153,7 +153,7 @@ private Elements findElements() {
return byClass();
} else if (tq.matchesWord()) {
return byTag();
} else if (tq.matchChomp("[")) {
} else if (tq.matches("[")) {
return byAttribute();
} else if (tq.matchChomp("*")) {
return allElements();
Expand Down Expand Up @@ -211,32 +211,34 @@ private Elements byTag() {
}

private Elements byAttribute() {
String key = tq.consumeToAny("=", "!=", "^=", "$=", "*=", "~=", "]"); // eq, not, start, end, contain, match, (no val)
TokenQueue cq = new TokenQueue(tq.chompBalanced('[', ']')); // content queue
String key = cq.consumeToAny("=", "!=", "^=", "$=", "*=", "~="); // eq, not, start, end, contain, match, (no val)
Validate.notEmpty(key);
cq.consumeWhitespace();

if (tq.matchChomp("]")) {
if (cq.isEmpty()) {
return key.startsWith("^") ? root.getElementsByAttributeStarting(key.substring(1)) : root.getElementsByAttribute(key);
} else {
if (tq.matchChomp("="))
return root.getElementsByAttributeValue(key, tq.chompTo("]"));
if (cq.matchChomp("="))
return root.getElementsByAttributeValue(key, cq.remainder());

else if (tq.matchChomp("!="))
return root.getElementsByAttributeValueNot(key, tq.chompTo("]"));
else if (cq.matchChomp("!="))
return root.getElementsByAttributeValueNot(key, cq.remainder());

else if (tq.matchChomp("^="))
return root.getElementsByAttributeValueStarting(key, tq.chompTo("]"));
else if (cq.matchChomp("^="))
return root.getElementsByAttributeValueStarting(key, cq.remainder());

else if (tq.matchChomp("$="))
return root.getElementsByAttributeValueEnding(key, tq.chompTo("]"));
else if (cq.matchChomp("$="))
return root.getElementsByAttributeValueEnding(key, cq.remainder());

else if (tq.matchChomp("*="))
return root.getElementsByAttributeValueContaining(key, tq.chompTo("]"));
else if (cq.matchChomp("*="))
return root.getElementsByAttributeValueContaining(key, cq.remainder());

else if (tq.matchChomp("~="))
return root.getElementsByAttributeValueMatching(key, tq.chompTo("]"));
else if (cq.matchChomp("~="))
return root.getElementsByAttributeValueMatching(key, cq.remainder());

else
throw new SelectorParseException("Could not parse attribute query '%s': unexpected token at '%s'", query, tq.remainder());
throw new SelectorParseException("Could not parse attribute query '%s': unexpected token at '%s'", query, cq.remainder());
}
}

Expand Down
8 changes: 8 additions & 0 deletions src/test/java/org/jsoup/select/SelectorTest.java
Expand Up @@ -116,6 +116,14 @@ public class SelectorTest {
assertEquals("3", imgs.get(2).id());
}

@Test public void testByAttributeRegexCharacterClass() {
Document doc = Jsoup.parse("<p><img src=foo.png id=1><img src=bar.jpg id=2><img src=qux.JPEG id=3><img src=old.gif id=4></p>");
Elements imgs = doc.select("img[src~=[o]]");
assertEquals(2, imgs.size());
assertEquals("1", imgs.get(0).id());
assertEquals("4", imgs.get(1).id());
}

@Test public void testAllElements() {
String h = "<div><p>Hello</p><p><b>there</b></p></div>";
Document doc = Jsoup.parse(h);
Expand Down

0 comments on commit 98f3cce

Please sign in to comment.