Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Initial commit

  • Loading branch information...
commit 49371c0212afd6ffebac3d852940e2fd8b3dadbd 1 parent bfc7c33
@tj authored
Showing with 163 additions and 0 deletions.
  1. +73 −0 selector.inc
  2. +90 −0 test.selector.php
View
73 selector.inc
@@ -0,0 +1,73 @@
+<?php
+
+function select_elements($selector, $html) {
+ $dom = new DOMDocument();
+ $dom->loadHTML($html);
+ $xpath = new DOMXpath($dom);
+ return elements_to_array($xpath->evaluate(selector_to_xpath($selector)));
+}
+
+function elements_to_array($elements) {
+ $array = array();
+ for ($i = 0, $length = $elements->length; $i < $length; ++$i)
+ if ($elements->item($i)->nodeType == XML_ELEMENT_NODE)
+ array_push($array, element_to_array($elements->item($i)));
+ return $array;
+}
+
+function element_to_array($element) {
+ $array = array(
+ 'name' => $element->nodeName,
+ 'attributes' => array(),
+ 'text' => $element->textContent,
+ 'children' => elements_to_array($element->childNodes)
+ );
+ foreach((array) $element->attributes as $key => $attr)
+ $array['attributes'][$key] = $attr->value;
+ return $array;
+}
+
+function selector_to_xpath($selector) {
+ $selector = 'descendant-or-self::' . $selector;
+ // :button, :submit, etc
+ $selector = preg_replace('/:(button|submit|file|checkbox|radio|image|reset|text|password)/', 'input[@type="\1"]', $selector);
+ // [id]
+ $selector = preg_replace('/\[(\w+)\]/', '*[@\1]', $selector);
+ // foo[id=foo]
+ $selector = preg_replace('/\[(\w+)=[\'"]?(.*?)[\'"]?\]/', '[@\1="\2"]', $selector);
+ // [id=foo]
+ $selector = str_replace(':[', ':*[', $selector);
+ // div#foo
+ $selector = preg_replace('/([\w\-]+)\#([\w\-]+)/', '\1[@id="\2"]', $selector);
+ // #foo
+ $selector = preg_replace('/\#([\w\-]+)/', '*[@id="\1"]', $selector);
+ // div.foo
+ $selector = preg_replace('/([\w\-]+)\.([\w\-]+)/', '\1[contains(@class,"\2")]', $selector);
+ // .foo
+ $selector = preg_replace('/\.([\w\-]+)/', '*[contains(@class,"\1")]', $selector);
+ // div:first-child
+ $selector = preg_replace('/([\w\-]+):first-child/', '*/\1[position()=1]', $selector);
+ // div:last-child
+ $selector = preg_replace('/([\w\-]+):last-child/', '*/\1[position()=last()]', $selector);
+ // :first-child
+ $selector = str_replace(':first-child', '*/*[position()=1]', $selector);
+ // :last-child
+ $selector = str_replace(':last-child', '*/*[position()=last()]', $selector);
+ // div:nth-child
+ $selector = preg_replace('/([\w\-]+):nth-child\((\d+)\)/', '*/\1[position()=\2]', $selector);
+ // :nth-child
+ $selector = preg_replace('/:nth-child\((\d+)\)/', '*/*[position()=\1]', $selector);
+ // :contains(Foo)
+ $selector = preg_replace('/([\w\-]+):contains\((.*?)\)/', '\1[contains(string(.),"\2")]', $selector);
+ // >
+ $selector = preg_replace('/\s*>\s*/', '/', $selector);
+ // ~
+ $selector = preg_replace('/\s*~\s*/', '/following-sibling::', $selector);
+ // +
+ $selector = preg_replace('/\s*\+\s*([\w\-]+)/', '/following-sibling::\1[position()=1]', $selector);
+ // ' '
+ $selector = preg_replace('/\s+/', '/descendant::', $selector);
+ $selector = str_replace(']*', ']', $selector);
+ $selector = str_replace(']/*', ']', $selector);
+ return $selector;
+}
View
90 test.selector.php
@@ -0,0 +1,90 @@
+<?php
+
+include('selector.inc');
+
+function test_selector($selector, $count) {
+ global $html;
+ $actual = count(select_elements($selector, $html));
+ print $actual == $count ? '.' : "\n '$selector' failed, expected $count but got $actual \n\n";
+}
+
+function test($selector, $expected) {
+ $actual = selector_to_xpath($selector);
+ print $actual == $expected ? '.' : "\n '$selector' \n expected '$expected' \n but got '$actual'\n\n";
+}
+
+test('foo', 'descendant-or-self::foo');
+test('foo bar', 'descendant-or-self::foo/descendant::bar');
+test('foo bar', 'descendant-or-self::foo/descendant::bar');
+test('foo > bar', 'descendant-or-self::foo/bar');
+test('foo >bar', 'descendant-or-self::foo/bar');
+test('foo>bar', 'descendant-or-self::foo/bar');
+test('foo> bar', 'descendant-or-self::foo/bar');
+test('div#foo', 'descendant-or-self::div[@id="foo"]');
+test('#foo', 'descendant-or-self::*[@id="foo"]');
+test('div.foo', 'descendant-or-self::div[contains(@class,"foo")]');
+test('.foo', 'descendant-or-self::*[contains(@class,"foo")]');
+test('[id]', 'descendant-or-self::*[@id]');
+test('[id=bar]', 'descendant-or-self::*[@id="bar"]');
+test('foo[id=bar]', 'descendant-or-self::foo[@id="bar"]');
+test(':button', 'descendant-or-self::input[@type="button"]');
+test(':submit', 'descendant-or-self::input[@type="submit"]');
+test(':first-child', 'descendant-or-self::*/*[position()=1]');
+test('div:first-child', 'descendant-or-self::*/div[position()=1]');
+test(':last-child', 'descendant-or-self::*/*[position()=last()]');
+test('div:last-child', 'descendant-or-self::*/div[position()=last()]');
+test(':nth-child(2)', 'descendant-or-self::*/*[position()=2]');
+test('div:nth-child(2)', 'descendant-or-self::*/div[position()=2]');
+test('foo + bar', 'descendant-or-self::foo/following-sibling::bar[position()=1]');
+test('li:contains(Foo)', 'descendant-or-self::li[contains(string(.),"Foo")]');
+
+test('foo bar baz', 'descendant-or-self::foo/descendant::bar/descendant::baz');
+test('foo + bar + baz', 'descendant-or-self::foo/following-sibling::bar[position()=1]/following-sibling::baz[position()=1]');
+test('foo > bar > baz', 'descendant-or-self::foo/bar/baz');
+test('p ~ p ~ p', 'descendant-or-self::p/following-sibling::p/following-sibling::p');
+test('div#article p em', 'descendant-or-self::div[@id="article"]/descendant::p/descendant::em');
+test('div.foo:first-child', 'descendant-or-self::div[contains(@class,"foo")][position()=1]');
+test('form#login > input[type=hidden]._method', 'descendant-or-self::form[@id="login"]/input[@type="hidden"][contains(@class,"_method")]');
+
+$html = <<<HTML
+ <div id="article" class="block large">
+ <h2>Article Name</h2>
+ <p>Contents of article</p>
+ <ul>
+ <li>One</li>
+ <li>Two</li>
+ <li>Three</li>
+ <li>Four</li>
+ <li><a href="#">Five</a></li>
+ </ul>
+ </div>
+HTML;
+
+test_selector('div', 1);
+test_selector('div#article', 1);
+test_selector('div#article.block', 1);
+test_selector('div#article.large.block', 1);
+test_selector('h2', 1);
+test_selector('div h2', 1);
+test_selector('div > h2', 1);
+test_selector('ul li a', 1);
+test_selector('ul > li > a', 1);
+test_selector('a[href=#]', 1);
+test_selector('a[href="#"]', 1);
+test_selector('div[id="article"]', 1);
+test_selector('h2:contains(Article)', 1);
+test_selector('h2:contains(Article) + p', 1);
+test_selector('h2:contains(Article) + p:contains(Contents)', 1);
+test_selector('li ~ li', 4);
+test_selector('li ~ li ~ li', 3);
+test_selector('li + li', 4);
+test_selector('li + li + li', 3);
+test_selector('li:first-child', 1);
+test_selector('li:last-child', 1);
+test_selector('li:contains(One):first-child', 1);
+test_selector('li:nth-child(2)', 1);
+test_selector('li:nth-child(3)', 1);
+test_selector('li:nth-child(4)', 1);
+test_selector('li:nth-child(6)', 0);
+
+print "\n";
Please sign in to comment.
Something went wrong with that request. Please try again.